From d176994483e46ddadf4c26c8120312b7bd34012c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 24 Jan 2024 19:58:38 +0100 Subject: [PATCH 001/651] Replace checkIfStorageSupportsSchemaInterface() with getStorageFeatures() Signed-off-by: Azat Khuzhin --- src/Databases/DatabaseOnDisk.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Storages/StorageFactory.cpp | 9 +++++++++ src/Storages/StorageFactory.h | 8 ++------ 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 642a7148487..60434c9029f 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -94,7 +94,7 @@ std::pair createTableFromAST( throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid storage definition in metadata file: " "it's a bug or result of manual intervention in metadata files"); - if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(ast_create_query.storage->engine->name)) + if (!StorageFactory::instance().getStorageFeatures(ast_create_query.storage->engine->name).supports_schema_inference) throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Missing definition of columns."); /// Leave columns empty. } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 9ce1c856622..a29e30febaf 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -853,7 +853,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected application state. CREATE query is missing either its storage or engine."); /// We can have queries like "CREATE TABLE ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). - else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) + else if (!StorageFactory::instance().getStorageFeatures(create.storage->engine->name).supports_schema_inference) throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect CREATE query: required list of column descriptions or AS section or SELECT."); /// Even if query has list of columns, canonicalize it (unfold Nested columns). diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index f9bc25ef72c..a33238f3911 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -253,4 +253,13 @@ AccessType StorageFactory::getSourceAccessType(const String & table_engine) cons return it->second.features.source_access_type; } + +const StorageFactory::StorageFeatures & StorageFactory::getStorageFeatures(const String & storage_name) const +{ + auto it = storages.find(storage_name); + if (it == storages.end()) + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown table engine {}", storage_name); + return it->second.features; +} + } diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index 7b1d7235bac..a4ac5c95bd8 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -129,12 +129,8 @@ public: AccessType getSourceAccessType(const String & table_engine) const; - bool checkIfStorageSupportsSchemaInterface(const String & storage_name) - { - if (storages.contains(storage_name)) - return storages[storage_name].features.supports_schema_inference; - return false; - } + const StorageFeatures & getStorageFeatures(const String & storage_name) const; + private: Storages storages; }; From 34001b94a4bc72533584833519b1b25907647b7d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 24 Jan 2024 20:42:18 +0100 Subject: [PATCH 002/651] Parse PRIMARY KEY extended syntax for VIEWs Before the following syntax had been parsed only for CREATE: - CREATE TABLE x (key INT PRIMARY KEY) - CREATE TABLE x (key INT, PRIMARY KEY key) And hence, never passed to CREATE MATERIALIZED VIEW at all. Signed-off-by: Azat Khuzhin --- src/Parsers/ParserCreateQuery.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1f6f68c9d8e..bfd49e8ed2b 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1551,6 +1551,29 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (comment) query->set(query->comment, comment); + if (query->columns_list && query->columns_list->primary_key) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key; + + } + + if (query->columns_list && (query->columns_list->primary_key_from_columns)) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key_from_columns; + } + tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); From c461437e6b0eb0c6a33b6a093cdb1e2a48c2230b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 24 Jan 2024 20:32:53 +0100 Subject: [PATCH 003/651] Add missing clone for primary_key_from_columns (just a style thing) It does not affect anything, since it moved into storage arguments during parsing, but since primary_key is there, let's do the same (maybe it will be used as a standalone parser oen time...). Signed-off-by: Azat Khuzhin --- src/Parsers/ASTCreateQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 9d5f0bcddbd..ea5db8d6752 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -139,6 +139,8 @@ ASTPtr ASTColumns::clone() const res->set(res->projections, projections->clone()); if (primary_key) res->set(res->primary_key, primary_key->clone()); + if (primary_key_from_columns) + res->set(res->primary_key_from_columns, primary_key_from_columns->clone()); return res; } From 8eb528cbe63b5027e78051c3b08029531efa5842 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 24 Jan 2024 20:08:01 +0100 Subject: [PATCH 004/651] Fix passing projections/indexes from CREATE query into inner table of MV Before this patch any additional syntax, like: - projections - data skipping indexes Had not been passed to the inner table (in case MV had been created without explicit TO table). Yes, this is not a "recommended" way of using, but still, it should work. v2: fix tests failures for Ordinary engine Signed-off-by: Azat Khuzhin --- src/Storages/StorageMaterializedView.cpp | 20 +++++++ .../02982_create_mv_inner_extra.reference | 5 ++ .../02982_create_mv_inner_extra.sql | 58 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 tests/queries/0_stateless/02982_create_mv_inner_extra.reference create mode 100644 tests/queries/0_stateless/02982_create_mv_inner_extra.sql diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index bfe75e61bcd..e204c916691 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -125,6 +126,9 @@ StorageMaterializedView::StorageMaterializedView( } else { + const String & engine = query.storage->engine->name; + const auto & storage_features = StorageFactory::instance().getStorageFeatures(engine); + /// We will create a query to create an internal table. auto create_context = Context::createCopy(local_context); auto manual_create_query = std::make_shared(); @@ -134,6 +138,22 @@ StorageMaterializedView::StorageMaterializedView( auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); + if (storage_features.supports_skipping_indices) + { + if (query.columns_list->indices) + new_columns_list->set(new_columns_list->indices, query.columns_list->indices->ptr()); + if (query.columns_list->constraints) + new_columns_list->set(new_columns_list->constraints, query.columns_list->constraints->ptr()); + if (query.columns_list->primary_key) + new_columns_list->set(new_columns_list->primary_key, query.columns_list->primary_key->ptr()); + if (query.columns_list->primary_key_from_columns) + new_columns_list->set(new_columns_list->primary_key_from_columns, query.columns_list->primary_key_from_columns->ptr()); + } + if (storage_features.supports_projections) + { + if (query.columns_list->projections) + new_columns_list->set(new_columns_list->projections, query.columns_list->projections->ptr()); + } manual_create_query->set(manual_create_query->columns_list, new_columns_list); manual_create_query->set(manual_create_query->storage, query.storage->ptr()); diff --git a/tests/queries/0_stateless/02982_create_mv_inner_extra.reference b/tests/queries/0_stateless/02982_create_mv_inner_extra.reference new file mode 100644 index 00000000000..06a60436b2f --- /dev/null +++ b/tests/queries/0_stateless/02982_create_mv_inner_extra.reference @@ -0,0 +1,5 @@ +CREATE TABLE x (`key` String) ENGINE = MergeTree PRIMARY KEY key ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE x (`key` String) ENGINE = MergeTree PRIMARY KEY tuple(key) ORDER BY tuple(key) SETTINGS index_granularity = 8192 +CREATE TABLE x (`key` String) ENGINE = Null +CREATE TABLE x (`key` String, INDEX idx key TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE x (`key` String, PROJECTION p (SELECT uniqCombined(key))) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02982_create_mv_inner_extra.sql b/tests/queries/0_stateless/02982_create_mv_inner_extra.sql new file mode 100644 index 00000000000..372d61c1683 --- /dev/null +++ b/tests/queries/0_stateless/02982_create_mv_inner_extra.sql @@ -0,0 +1,58 @@ +-- Tags: no-random-merge-tree-settings + +DROP TABLE IF EXISTS data; +DROP TABLE IF EXISTS mv_indexes; +DROP TABLE IF EXISTS mv_no_indexes; +DROP TABLE IF EXISTS mv_projections; +DROP TABLE IF EXISTS mv_primary_key; +DROP TABLE IF EXISTS mv_primary_key_from_column; + +CREATE TABLE data +( + key String, +) +ENGINE = MergeTree +ORDER BY key; + +CREATE MATERIALIZED VIEW mv_indexes +( + key String, + INDEX idx key TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY key +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_no_indexes +( + key String, + INDEX idx key TYPE bloom_filter GRANULARITY 1 +) +ENGINE = Null +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_projections +( + key String, + projection p (SELECT uniqCombined(key)) +) +ENGINE = MergeTree +ORDER BY key +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_primary_key +( + key String, + PRIMARY KEY key +) +ENGINE = MergeTree +AS SELECT * FROM data; + +CREATE MATERIALIZED VIEW mv_primary_key_from_column +( + key String PRIMARY KEY +) +ENGINE = MergeTree +AS SELECT * FROM data; + +SELECT replaceRegexpOne(create_table_query, 'CREATE TABLE [^ ]*', 'CREATE TABLE x') FROM system.tables WHERE database = currentDatabase() and table LIKE '.inner%' ORDER BY 1 FORMAT LineAsString; From 8421f23975ff6155c027dc38d87533a3c14b9acf Mon Sep 17 00:00:00 2001 From: Blargian Date: Sun, 28 Jan 2024 22:56:47 +0100 Subject: [PATCH 005/651] #56257 - add failing test and new setting for parsing TSV files with crlf --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../02973_parse_crlf_with_tsv_files.sh | 23 +++++++++++++++++++ 4 files changed, 26 insertions(+) create mode 100755 tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e0b3ca39899..a62380ad926 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1012,6 +1012,7 @@ class IColumn; M(Bool, input_format_csv_skip_trailing_empty_lines, false, "Skip trailing empty lines in CSV format", 0) \ M(Bool, input_format_tsv_skip_trailing_empty_lines, false, "Skip trailing empty lines in TSV format", 0) \ M(Bool, input_format_custom_skip_trailing_empty_lines, false, "Skip trailing empty lines in CustomSeparated format", 0) \ + M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \ \ M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \ \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 62cbadec4f4..a4a6e1ab83a 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -175,6 +175,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header; format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines; format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns; + format_settings.tsv.crlf_end_of_line_input = settings.input_format_tsv_crlf_end_of_line; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 30e4dd04513..7231e10a763 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -351,6 +351,7 @@ struct FormatSettings bool try_detect_header = true; bool skip_trailing_empty_lines = false; bool allow_variable_number_of_columns = false; + bool crlf_end_of_line_input = false; } tsv; struct diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh new file mode 100755 index 00000000000..6f7308e18a4 --- /dev/null +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +# Test setup +touch ${USER_FILES_PATH:?}/02973_data_without_crlf.tsv +touch ${USER_FILES_PATH:?}/02973_data_with_crlf.tsv +echo -e 'Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t34\n1971-72_Utah_Stars_season\t2016-10-01\t1' > "$USER_FILES_PATH/02973_data_without_crlf.tsv" +echo -e 'Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r' > "$USER_FILES_PATH/02973_data_with_crlf.tsv" + +$CLICKHOUSE_CLIENT --multiquery "SELECT * FROM file(02973_data_without_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" +$CLICKHOUSE_CLIENT --multiquery "SELECT * FROM file(02973_data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{clientError 117}" + +# Change setting to escape \r +$CLICKHOUSE_CLIENT --multiquery "SELECT * FROM file(02973_data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" + +# Test teardown +rm "$USER_FILES_PATH/02973_data_without_crlf.tsv" +rm "$USER_FILES_PATH/02973_data_with_crlf.tsv" From 7ae202376f29e56a9dc82ad911155ab451c0317b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 31 Jan 2024 21:03:06 +0100 Subject: [PATCH 006/651] missed place for documentation change --- docs/en/operations/settings/settings-formats.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index eb09af44efd..e5c555af018 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -823,7 +823,13 @@ Default value: `0`. ### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} -Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). +Use DOS/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). + +Disabled by default. + +### input_format_tsv_crlf_end_of_line {#input_format_tsv_crlf_end_of_line} + +Use DOS/Windows-style line separator (CRLF) for TSV input files instead of Unix style (LF). Disabled by default. From 31416bc4885a5d6302e8e59235921cc018b121b4 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 31 Jan 2024 21:03:47 +0100 Subject: [PATCH 007/651] 2 tests fail - not sure if related to changes, try again From ab384f86527641a6a9c28179fe995e957072e157 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sun, 4 Feb 2024 15:29:57 +0100 Subject: [PATCH 008/651] add support_crlf for TSV format --- .../SerializationFixedString.cpp | 8 +++- .../Serializations/SerializationNullable.cpp | 5 +- .../Serializations/SerializationString.cpp | 8 +++- src/Formats/EscapingRuleUtils.cpp | 4 +- src/IO/ReadHelpers.cpp | 46 +++++++++++++++---- src/IO/ReadHelpers.h | 6 ++- .../Formats/Impl/TSKVRowInputFormat.cpp | 2 +- .../Impl/TabSeparatedRowInputFormat.cpp | 11 +++-- 8 files changed, 68 insertions(+), 22 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index fa50af52f2f..cf731409fd0 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -151,9 +151,13 @@ static inline void read(const SerializationFixedString & self, IColumn & column, } -void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); }); + read(*this, column, [&istr, &settings](ColumnFixedString::Chars & data) + { + settings.tsv.crlf_end_of_line_input ? readEscapedStringInto(data, istr) + : readEscapedStringInto(data, istr); + }); } diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 4b0ad0b54ba..c0fbdfbb022 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -290,6 +290,7 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col const SerializationPtr & nested_serialization) { const String & null_representation = settings.tsv.null_representation; + const bool supports_crlf = settings.tsv.crlf_end_of_line_input; /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok. if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0])) @@ -309,10 +310,10 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col /// Check if we have enough data in buffer to check if it's a null. if (istr.available() > null_representation.size()) { - auto check_for_null = [&istr, &null_representation]() + auto check_for_null = [&istr, &null_representation, &supports_crlf]() { auto * pos = istr.position(); - if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n')) + if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n' || (supports_crlf && *istr.position() == '\r'))) return true; istr.position() = pos; return false; diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index b2b083fd466..4ff0ba9a400 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -301,9 +301,13 @@ void SerializationString::deserializeWholeText(IColumn & column, ReadBuffer & is } -void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); }); + read(column, [&](ColumnString::Chars & data) + { + settings.tsv.crlf_end_of_line_input ? readEscapedStringInto,true>(data, istr) + : readEscapedStringInto,false>(data, istr); + }); } diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index a7e9fb8e99f..481696edc49 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -76,7 +76,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca /// Empty field, just skip spaces break; case FormatSettings::EscapingRule::Escaped: - readEscapedStringInto(out, buf); + readEscapedStringInto(out, buf); break; case FormatSettings::EscapingRule::Quoted: readQuotedFieldInto(out, buf); @@ -236,7 +236,7 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin if constexpr (read_string) readEscapedString(result, buf); else - readTSVField(result, buf); + readTSVField(result, buf); break; default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read value with {} escaping rule", escapingRuleToString(escaping_rule)); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 05d35a57b12..90168325d99 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -496,13 +496,19 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf) } -template +template void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) { while (!buf.eof()) { - char * next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); - + char * next_pos; + if constexpr (support_crlf) + { + next_pos = find_first_symbols<'\t', '\n', '\\','\r'>(buf.position(), buf.buffer().end()); + } else { + next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); + } + appendToStringOrVector(s, buf, next_pos); buf.position() = next_pos; @@ -529,25 +535,41 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) } } } + + if (*buf.position() == '\r') + { + ++buf.position(); // advance to \n after \r + } } } -template +template void readEscapedStringInto(Vector & s, ReadBuffer & buf) { - readEscapedStringIntoImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } void readEscapedString(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringInto(s, buf); + readEscapedStringInto(s, buf); } -template void readEscapedStringInto>(PaddedPODArray & s, ReadBuffer & buf); -template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); +template +void readEscapedStringCRLF(String & s, ReadBuffer & buf) +{ + s.clear(); + readEscapedStringInto(s, buf); +} +template void readEscapedStringInto,false>(PaddedPODArray & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); +template void readEscapedStringInto,true>(PaddedPODArray & s, ReadBuffer & buf); +template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); + +template void readEscapedStringCRLF(String & s, ReadBuffer & buf); +template void readEscapedStringCRLF(String & s, ReadBuffer & buf); /** If enable_sql_style_quoting == true, * strings like 'abc''def' will be parsed as abc'def. @@ -1761,10 +1783,16 @@ void readJSONField(String & s, ReadBuffer & buf) readParsedValueInto(s, buf, parse_func); } +template void readTSVField(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringIntoImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } +template void readTSVField(String & s, ReadBuffer & buf); +template void readTSVField(String & s, ReadBuffer & buf); + } + + diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 85584d63ee8..5ee56201035 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -548,6 +548,9 @@ void readString(String & s, ReadBuffer & buf); void readEscapedString(String & s, ReadBuffer & buf); +template +void readEscapedStringCRLF(String & s, ReadBuffer & buf); + void readQuotedString(String & s, ReadBuffer & buf); void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf); @@ -601,7 +604,7 @@ void readStringInto(Vector & s, ReadBuffer & buf); template void readNullTerminated(Vector & s, ReadBuffer & buf); -template +template void readEscapedStringInto(Vector & s, ReadBuffer & buf); template @@ -1757,6 +1760,7 @@ void readQuotedField(String & s, ReadBuffer & buf); void readJSONField(String & s, ReadBuffer & buf); +template void readTSVField(String & s, ReadBuffer & buf); /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters). diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 432e944a246..d59b5cdd2d0 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -134,7 +134,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// If the key is not found, skip the value. NullOutput sink; - readEscapedStringInto(sink, *in); + readEscapedStringInto(sink, *in); } else { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 6f6dae334e5..afd91e913d2 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "Formats/FormatSettings.h" namespace DB { @@ -105,14 +106,17 @@ template String TabSeparatedFormatReader::readFieldIntoString() { String field; + bool support_crlf = format_settings.tsv.crlf_end_of_line_input; if (is_raw) readString(field, *buf); else { if constexpr (read_string) - readEscapedString(field, *buf); + support_crlf ? readEscapedStringCRLF(field, *buf) + : readEscapedStringCRLF(field, *buf); else - readTSVField(field, *buf); + support_crlf ? readTSVField(field, *buf) + : readTSVField(field, *buf); } return field; } @@ -123,7 +127,8 @@ void TabSeparatedFormatReader::skipField() if (is_raw) readStringInto(out, *buf); else - readEscapedStringInto(out, *buf); + format_settings.tsv.crlf_end_of_line_input ? readEscapedStringInto(out, *buf) + : readEscapedStringInto(out, *buf); } void TabSeparatedFormatReader::skipHeaderRow() From a12d8d749dc660da64c34188cff4dbc2d33946a8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 6 Feb 2024 17:17:24 +0100 Subject: [PATCH 009/651] modify skipRowEndDelimiter for \r --- .../Serializations/SerializationNullable.cpp | 5 ++--- .../Formats/Impl/TabSeparatedRowInputFormat.cpp | 11 ++++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index c0fbdfbb022..4b0ad0b54ba 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -290,7 +290,6 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col const SerializationPtr & nested_serialization) { const String & null_representation = settings.tsv.null_representation; - const bool supports_crlf = settings.tsv.crlf_end_of_line_input; /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok. if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0])) @@ -310,10 +309,10 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col /// Check if we have enough data in buffer to check if it's a null. if (istr.available() > null_representation.size()) { - auto check_for_null = [&istr, &null_representation, &supports_crlf]() + auto check_for_null = [&istr, &null_representation]() { auto * pos = istr.position(); - if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n' || (supports_crlf && *istr.position() == '\r'))) + if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n')) return true; istr.position() = pos; return false; diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index afd91e913d2..5a94a505984 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -23,9 +23,13 @@ namespace ErrorCodes /** Check for a common error case - usage of Windows line feed. */ +template static void checkForCarriageReturn(ReadBuffer & in) { - if (!in.eof() && (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))) + bool crlf_escaped = false; + if constexpr (supports_crlf) + crlf_escaped = true; + if (!in.eof() && (in.position()[0] == '\r' || (crlf_escaped ? false : (in.position() != in.buffer().begin() && in.position()[-1] == '\r')))) throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." " You must transform your file to Unix format." @@ -90,12 +94,13 @@ void TabSeparatedFormatReader::skipFieldDelimiter() void TabSeparatedFormatReader::skipRowEndDelimiter() { + bool supports_crfl = format_settings.tsv.crlf_end_of_line_input; if (buf->eof()) return; if (unlikely(first_row)) - { - checkForCarriageReturn(*buf); + { + supports_crfl ? checkForCarriageReturn(*buf) : checkForCarriageReturn(*buf); first_row = false; } From a2dfc4856712ad8003eef902d33bafb3f47cc6aa Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 8 Feb 2024 07:41:50 +0100 Subject: [PATCH 010/651] change typo crfl to crlf in skipRowEndDelimiter function --- src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 3f18aad3bd1..dbd939effe1 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -95,16 +95,16 @@ void TabSeparatedFormatReader::skipFieldDelimiter() void TabSeparatedFormatReader::skipRowEndDelimiter() { - bool supports_crfl = format_settings.tsv.crlf_end_of_line_input; + bool supports_crlf = format_settings.tsv.crlf_end_of_line_input; if (buf->eof()) return; - if (supports_crfl && first_row==false) + if (supports_crlf && first_row==false) { ++buf->position(); } if (unlikely(first_row)) { - supports_crfl ? checkForCarriageReturn(*buf) : checkForCarriageReturn(*buf); + supports_crlf ? checkForCarriageReturn(*buf) : checkForCarriageReturn(*buf); first_row = false; } assertChar('\n', *buf); From 04abd62288a55a0d6b3a315e08a6410a39e70199 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 8 Feb 2024 07:43:41 +0100 Subject: [PATCH 011/651] rename reference file to fix typo of crfl to crlf --- ..._files.reference => 02973_parse_crlf_with_tsv_files.reference} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02973_parse_crfl_with_tsv_files.reference => 02973_parse_crlf_with_tsv_files.reference} (100%) diff --git a/tests/queries/0_stateless/02973_parse_crfl_with_tsv_files.reference b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference similarity index 100% rename from tests/queries/0_stateless/02973_parse_crfl_with_tsv_files.reference rename to tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference From d53632d61ea85040572c7f4f449e48b54737090d Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 8 Feb 2024 07:50:13 +0100 Subject: [PATCH 012/651] update SettingsChangesHistory --- src/Core/SettingsChangesHistory.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index db3a76e29cd..8b918c1c064 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,7 +90,8 @@ static std::map sett {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}}}, + {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}}}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, From debc804b777ec8c0355b29d9f325defd461e5e63 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 8 Feb 2024 08:10:53 +0100 Subject: [PATCH 013/651] documentation changes --- docs/en/interfaces/formats.md | 1 + docs/ru/interfaces/formats.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a11c3e5ef19..0a5a9c6a076 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -195,6 +195,7 @@ SELECT * FROM nestedt FORMAT TSV - [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. - [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. - [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV input format will be `\r\n` instead of `\n`. Default value - `false`. - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index b4794b02743..c4892c74515 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -119,6 +119,7 @@ Hello\nworld Hello\ world ``` +`\n\r` (CRLF) поддерживается с помощью настройки `input_format_tsv_crlf_end_of_line`. Второй вариант поддерживается, так как его использует MySQL при записи tab-separated дампа. From 3cca8410385c216ced1c9366a8e8cda8503f3407 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 9 Feb 2024 18:55:21 +0100 Subject: [PATCH 014/651] Unite s3/hdfs/azure storage implementations into a single class on top of IObjectStorage --- src/Backups/BackupIO_AzureBlobStorage.cpp | 26 +- src/Backups/BackupIO_AzureBlobStorage.h | 46 +- .../registerBackupEngineAzureBlobStorage.cpp | 18 +- src/CMakeLists.txt | 1 + .../AzureBlobStorage/AzureObjectStorage.cpp | 8 +- ...jectStorageRemoteMetadataRestoreHelper.cpp | 14 +- src/Disks/ObjectStorages/IObjectStorage.h | 5 +- src/Disks/ObjectStorages/IObjectStorage_fwd.h | 3 + .../MetadataStorageFromPlainObjectStorage.cpp | 2 +- .../ObjectStorages/ObjectStorageIterator.cpp | 2 +- .../ObjectStorages/ObjectStorageIterator.h | 22 +- .../ObjectStorageIteratorAsync.cpp | 4 +- .../ObjectStorageIteratorAsync.h | 4 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 26 +- .../copyAzureBlobStorageFile.h | 3 +- src/Interpreters/InterpreterSystemQuery.cpp | 6 +- src/Server/TCPHandler.cpp | 2 +- .../DataLakes/DeltaLakeMetadataParser.cpp | 87 +- .../DataLakes/DeltaLakeMetadataParser.h | 10 +- src/Storages/DataLakes/HudiMetadataParser.cpp | 181 +- src/Storages/DataLakes/HudiMetadataParser.h | 15 +- src/Storages/DataLakes/IStorageDataLake.h | 144 +- .../DataLakes/Iceberg/IcebergMetadata.cpp | 65 +- .../DataLakes/Iceberg/IcebergMetadata.h | 27 +- .../DataLakes/Iceberg/StorageIceberg.cpp | 79 - .../DataLakes/Iceberg/StorageIceberg.h | 117 +- src/Storages/DataLakes/S3MetadataReader.cpp | 86 - src/Storages/DataLakes/S3MetadataReader.h | 25 - src/Storages/DataLakes/StorageDeltaLake.h | 7 +- src/Storages/DataLakes/StorageHudi.h | 7 +- src/Storages/DataLakes/registerDataLakes.cpp | 38 +- src/Storages/HDFS/StorageHDFS.cpp | 1117 ---------- src/Storages/HDFS/StorageHDFS.h | 179 -- src/Storages/HDFS/StorageHDFSCluster.cpp | 98 - src/Storages/HDFS/StorageHDFSCluster.h | 56 - src/Storages/IStorage.h | 9 +- .../ObjectStorage/AzureConfiguration.cpp | 451 ++++ .../ObjectStorage/AzureConfiguration.h | 54 + src/Storages/ObjectStorage/Configuration.h | 55 + .../ObjectStorage/HDFSConfiguration.h | 81 + .../ObjectStorage/ReadBufferIterator.h | 197 ++ .../ObjectStorage/ReadFromObjectStorage.h | 105 + .../ObjectStorage/S3Configuration.cpp | 491 +++++ src/Storages/ObjectStorage/S3Configuration.h | 46 + src/Storages/ObjectStorage/Settings.h | 86 + .../ObjectStorage/StorageObjectStorage.cpp | 303 +++ .../ObjectStorage/StorageObjectStorage.h | 116 + .../StorageObjectStorageCluster.cpp | 107 + .../StorageObjectStorageCluster.h | 72 + .../ObjectStorage/StorageObjectStorageSink.h | 155 ++ .../StorageObjectStorageSource.cpp | 464 ++++ .../StorageObjectStorageSource.h | 217 ++ .../registerStorageObjectStorage.cpp | 166 ++ src/Storages/ObjectStorageConfiguration.h | 0 src/Storages/S3Queue/S3QueueSource.cpp | 85 +- src/Storages/S3Queue/S3QueueSource.h | 42 +- src/Storages/S3Queue/S3QueueTableMetadata.cpp | 3 +- src/Storages/S3Queue/S3QueueTableMetadata.h | 7 +- src/Storages/S3Queue/StorageS3Queue.cpp | 101 +- src/Storages/S3Queue/StorageS3Queue.h | 14 +- src/Storages/StorageAzureBlob.cpp | 1478 ------------- src/Storages/StorageAzureBlob.h | 339 --- src/Storages/StorageAzureBlobCluster.cpp | 89 - src/Storages/StorageAzureBlobCluster.h | 56 - src/Storages/StorageS3.cpp | 1905 ----------------- src/Storages/StorageS3.h | 399 ---- src/Storages/StorageS3Cluster.cpp | 103 - src/Storages/StorageS3Cluster.h | 58 - .../StorageSystemSchemaInferenceCache.cpp | 6 +- src/Storages/registerStorages.cpp | 17 +- src/TableFunctions/ITableFunctionCluster.h | 6 +- src/TableFunctions/ITableFunctionDataLake.h | 22 +- .../TableFunctionAzureBlobStorage.cpp | 323 --- .../TableFunctionAzureBlobStorage.h | 80 - .../TableFunctionAzureBlobStorageCluster.cpp | 85 - .../TableFunctionAzureBlobStorageCluster.h | 55 - src/TableFunctions/TableFunctionDeltaLake.cpp | 24 +- src/TableFunctions/TableFunctionHDFS.cpp | 54 - src/TableFunctions/TableFunctionHDFS.h | 50 - .../TableFunctionHDFSCluster.cpp | 61 - src/TableFunctions/TableFunctionHDFSCluster.h | 54 - src/TableFunctions/TableFunctionHudi.cpp | 24 +- src/TableFunctions/TableFunctionIceberg.cpp | 7 +- .../TableFunctionObjectStorage.cpp | 224 ++ .../TableFunctionObjectStorage.h | 150 ++ .../TableFunctionObjectStorageCluster.cpp | 113 + .../TableFunctionObjectStorageCluster.h | 91 + src/TableFunctions/TableFunctionS3.cpp | 464 ---- src/TableFunctions/TableFunctionS3.h | 86 - src/TableFunctions/TableFunctionS3Cluster.cpp | 74 - src/TableFunctions/TableFunctionS3Cluster.h | 64 - src/TableFunctions/registerTableFunctions.cpp | 23 +- src/TableFunctions/registerTableFunctions.h | 9 +- .../test_storage_azure_blob_storage/test.py | 8 +- 94 files changed, 4403 insertions(+), 8155 deletions(-) delete mode 100644 src/Storages/DataLakes/S3MetadataReader.cpp delete mode 100644 src/Storages/DataLakes/S3MetadataReader.h delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp delete mode 100644 src/Storages/HDFS/StorageHDFS.h delete mode 100644 src/Storages/HDFS/StorageHDFSCluster.cpp delete mode 100644 src/Storages/HDFS/StorageHDFSCluster.h create mode 100644 src/Storages/ObjectStorage/AzureConfiguration.cpp create mode 100644 src/Storages/ObjectStorage/AzureConfiguration.h create mode 100644 src/Storages/ObjectStorage/Configuration.h create mode 100644 src/Storages/ObjectStorage/HDFSConfiguration.h create mode 100644 src/Storages/ObjectStorage/ReadBufferIterator.h create mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorage.h create mode 100644 src/Storages/ObjectStorage/S3Configuration.cpp create mode 100644 src/Storages/ObjectStorage/S3Configuration.h create mode 100644 src/Storages/ObjectStorage/Settings.h create mode 100644 src/Storages/ObjectStorage/StorageObjectStorage.cpp create mode 100644 src/Storages/ObjectStorage/StorageObjectStorage.h create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageCluster.h create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSink.h create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSource.cpp create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSource.h create mode 100644 src/Storages/ObjectStorage/registerStorageObjectStorage.cpp create mode 100644 src/Storages/ObjectStorageConfiguration.h delete mode 100644 src/Storages/StorageAzureBlob.cpp delete mode 100644 src/Storages/StorageAzureBlob.h delete mode 100644 src/Storages/StorageAzureBlobCluster.cpp delete mode 100644 src/Storages/StorageAzureBlobCluster.h delete mode 100644 src/Storages/StorageS3.cpp delete mode 100644 src/Storages/StorageS3.h delete mode 100644 src/Storages/StorageS3Cluster.cpp delete mode 100644 src/Storages/StorageS3Cluster.h delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorage.cpp delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorage.h delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorageCluster.h delete mode 100644 src/TableFunctions/TableFunctionHDFS.cpp delete mode 100644 src/TableFunctions/TableFunctionHDFS.h delete mode 100644 src/TableFunctions/TableFunctionHDFSCluster.cpp delete mode 100644 src/TableFunctions/TableFunctionHDFSCluster.h create mode 100644 src/TableFunctions/TableFunctionObjectStorage.cpp create mode 100644 src/TableFunctions/TableFunctionObjectStorage.h create mode 100644 src/TableFunctions/TableFunctionObjectStorageCluster.cpp create mode 100644 src/TableFunctions/TableFunctionObjectStorageCluster.h delete mode 100644 src/TableFunctions/TableFunctionS3.cpp delete mode 100644 src/TableFunctions/TableFunctionS3.h delete mode 100644 src/TableFunctions/TableFunctionS3Cluster.cpp delete mode 100644 src/TableFunctions/TableFunctionS3Cluster.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 52ce20d5108..dc636f90be7 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -29,7 +28,7 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const StorageAzureBlobConfiguration & configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) @@ -37,10 +36,10 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , configuration(configuration_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client_ptr = configuration.createClient(/* is_read_only */ false); object_storage = std::make_unique("BackupReaderAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), + configuration.createSettings(context_), configuration_.container); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); @@ -137,7 +136,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - StorageAzureBlob::Configuration configuration_, + const StorageAzureBlobConfiguration & configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) @@ -145,17 +144,22 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , configuration(configuration_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client_ptr = configuration.createClient(/* is_read_only */ false); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + configuration.createSettings(context_), + configuration.container); client = object_storage->getAzureBlobStorageClient(); settings = object_storage->getSettings(); } -void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, - bool copy_encrypted, UInt64 start_pos, UInt64 length) +void BackupWriterAzureBlobStorage::copyFileFromDisk( + const String & path_in_backup, + DiskPtr src_disk, + const String & src_path, + bool copy_encrypted, + UInt64 start_pos, + UInt64 length) { /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible. auto source_data_source_description = src_disk->getDataSourceDescription(); @@ -241,7 +245,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist"); - return children[0].metadata.size_bytes; + return children[0]->metadata.size_bytes; } std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 95325044a62..99002c53769 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -5,8 +5,8 @@ #if USE_AZURE_BLOB_STORAGE #include #include -#include #include +#include namespace DB @@ -16,20 +16,30 @@ namespace DB class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: - BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderAzureBlobStorage( + const StorageAzureBlobConfiguration & configuration_, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_); + ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; UInt64 getFileSize(const String & file_name) override; std::unique_ptr readFile(const String & file_name) override; - void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup, - DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override; + void copyFileToDisk( + const String & path_in_backup, + size_t file_size, + bool encrypted_in_backup, + DiskPtr destination_disk, + const String & destination_path, + WriteMode write_mode) override; private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + StorageAzureBlobConfiguration configuration; std::unique_ptr object_storage; std::shared_ptr settings; }; @@ -37,16 +47,31 @@ private: class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupWriterAzureBlobStorage( + const StorageAzureBlobConfiguration & configuration_, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_); + ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; UInt64 getFileSize(const String & file_name) override; std::unique_ptr writeFile(const String & file_name) override; - void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override; - void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, - bool copy_encrypted, UInt64 start_pos, UInt64 length) override; + void copyDataToFile( + const String & path_in_backup, + const CreateReadBufferFunction & create_read_buffer, + UInt64 start_pos, + UInt64 length) override; + + void copyFileFromDisk( + const String & path_in_backup, + DiskPtr src_disk, + const String & src_path, + bool copy_encrypted, + UInt64 start_pos, + UInt64 length) override; void copyFile(const String & destination, const String & source, size_t size) override; @@ -56,9 +81,10 @@ public: private: std::unique_ptr readFile(const String & file_name, size_t expected_file_size) override; void removeFilesBatch(const Strings & file_names); + const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlob::Configuration configuration; + StorageAzureBlobConfiguration configuration; std::unique_ptr object_storage; std::shared_ptr settings; }; diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 48f66569304..9408c7ccdcf 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -5,11 +5,11 @@ #if USE_AZURE_BLOB_STORAGE #include -#include #include #include #include #include +#include #include #endif @@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureBlob::Configuration configuration; + StorageAzureBlobConfiguration configuration; if (!id_arg.empty()) { @@ -59,6 +59,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); + if (!config.has(config_prefix)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no `{}` in config", config_prefix); + if (config.has(config_prefix + ".connection_string")) { configuration.connection_url = config.getString(config_prefix + ".connection_string"); @@ -75,10 +78,11 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } if (args.size() > 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.blob_path = args[0].safeGet(); + configuration.setPath(args[0].safeGet()); } else @@ -110,12 +114,14 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.blob_path)) + if (hasRegisteredArchiveFileExtension(configuration.getPath())) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - archive_params.archive_name = removeFileNameFromURL(configuration.blob_path); + auto path = configuration.getPath(); + configuration.setPath(removeFileNameFromURL(path)); + archive_params.archive_name = configuration.getPath(); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 08913ed1b5a..50130e6abd0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -119,6 +119,7 @@ endif() add_headers_and_sources(dbms Storages/DataLakes) add_headers_and_sources(dbms Storages/DataLakes/Iceberg) +add_headers_and_sources(dbms Storages/ObjectStorage) add_headers_and_sources(dbms Common/NamedCollections) if (TARGET ch_contrib::amqp_cpp) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 74389aedb64..2ca44137442 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -65,14 +65,14 @@ private: for (const auto & blob : blobs_list) { - batch.emplace_back( + batch.emplace_back(std::make_shared( blob.Name, ObjectMetadata{ static_cast(blob.BlobSize), Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), - {}}); + {}})); } if (!blob_list_response.NextPageToken.HasValue() || blob_list_response.NextPageToken.Value().empty()) @@ -156,14 +156,14 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith for (const auto & blob : blobs_list) { - children.emplace_back( + children.emplace_back(std::make_shared( blob.Name, ObjectMetadata{ static_cast(blob.BlobSize), Poco::Timestamp::fromEpochTime( std::chrono::duration_cast( static_cast(blob.Details.LastModified).time_since_epoch()).count()), - {}}); + {}})); } if (max_keys) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 0314e0a7e92..cc9ee3db505 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -363,18 +363,18 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * for (const auto & object : objects) { - LOG_INFO(disk->log, "Calling restore for key for disk {}", object.relative_path); + LOG_INFO(disk->log, "Calling restore for key for disk {}", object->relative_path); /// Skip file operations objects. They will be processed separately. - if (object.relative_path.find("/operations/") != String::npos) + if (object->relative_path.find("/operations/") != String::npos) continue; - const auto [revision, _] = extractRevisionAndOperationFromKey(object.relative_path); + const auto [revision, _] = extractRevisionAndOperationFromKey(object->relative_path); /// Filter early if it's possible to get revision from key. if (revision > restore_information.revision) continue; - keys_names.push_back(object.relative_path); + keys_names.push_back(object->relative_path); } if (!keys_names.empty()) @@ -474,10 +474,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject for (const auto & object : objects) { - const auto [revision, operation] = extractRevisionAndOperationFromKey(object.relative_path); + const auto [revision, operation] = extractRevisionAndOperationFromKey(object->relative_path); if (revision == UNKNOWN_REVISION) { - LOG_WARNING(disk->log, "Skip key {} with unknown revision", object.relative_path); + LOG_WARNING(disk->log, "Skip key {} with unknown revision", object->relative_path); continue; } @@ -490,7 +490,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject if (send_metadata) revision_counter = revision - 1; - auto object_attributes = *(source_object_storage->getObjectMetadata(object.relative_path).attributes); + auto object_attributes = *(source_object_storage->getObjectMetadata(object->relative_path).attributes); if (operation == rename) { auto from_path = object_attributes["from_path"]; diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 049935ad60c..7d354e6383d 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -62,6 +62,8 @@ struct RelativePathWithMetadata : relative_path(std::move(relative_path_)) , metadata(std::move(metadata_)) {} + + virtual ~RelativePathWithMetadata() = default; }; struct ObjectKeyWithMetadata @@ -77,7 +79,8 @@ struct ObjectKeyWithMetadata {} }; -using RelativePathsWithMetadata = std::vector; +using RelativePathWithMetadataPtr = std::shared_ptr; +using RelativePathsWithMetadata = std::vector; using ObjectKeysWithMetadata = std::vector; class IObjectStorageIterator; diff --git a/src/Disks/ObjectStorages/IObjectStorage_fwd.h b/src/Disks/ObjectStorages/IObjectStorage_fwd.h index f6ebc883682..67efa4aae2b 100644 --- a/src/Disks/ObjectStorages/IObjectStorage_fwd.h +++ b/src/Disks/ObjectStorages/IObjectStorage_fwd.h @@ -10,4 +10,7 @@ using ObjectStoragePtr = std::shared_ptr; class IMetadataStorage; using MetadataStoragePtr = std::shared_ptr; +class IObjectStorageIterator; +using ObjectStorageIteratorPtr = std::shared_ptr; + } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index b03809f5b39..f07cf23106f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -77,7 +77,7 @@ std::vector MetadataStorageFromPlainObjectStorage::listDirectory(co std::vector result; for (const auto & path_size : files) { - result.push_back(path_size.relative_path); + result.push_back(path_size->relative_path); } std::unordered_set duplicates_filter; diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp index 72ec6e0e500..3d939ce9230 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp @@ -9,7 +9,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -RelativePathWithMetadata ObjectStorageIteratorFromList::current() +RelativePathWithMetadataPtr ObjectStorageIteratorFromList::current() { if (!isValid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator"); diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index 841b0ea6664..e934fc2056d 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -12,9 +12,9 @@ public: virtual void next() = 0; virtual void nextBatch() = 0; virtual bool isValid() = 0; - virtual RelativePathWithMetadata current() = 0; + virtual RelativePathWithMetadataPtr current() = 0; virtual RelativePathsWithMetadata currentBatch() = 0; - virtual std::optional getCurrrentBatchAndScheduleNext() = 0; + virtual std::optional getCurrentBatchAndScheduleNext() = 0; virtual size_t getAccumulatedSize() const = 0; virtual ~IObjectStorageIterator() = default; @@ -47,22 +47,14 @@ public: return batch_iterator != batch.end(); } - RelativePathWithMetadata current() override; + RelativePathWithMetadataPtr current() override; - RelativePathsWithMetadata currentBatch() override - { - return batch; - } + RelativePathsWithMetadata currentBatch() override { return batch; } - virtual std::optional getCurrrentBatchAndScheduleNext() override - { - return std::nullopt; - } + std::optional getCurrentBatchAndScheduleNext() override { return std::nullopt; } + + size_t getAccumulatedSize() const override { return batch.size(); } - size_t getAccumulatedSize() const override - { - return batch.size(); - } private: RelativePathsWithMetadata batch; RelativePathsWithMetadata::iterator batch_iterator; diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index 990e66fc4e5..b7729623a64 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -82,7 +82,7 @@ bool IObjectStorageIteratorAsync::isValid() return current_batch_iterator != current_batch.end(); } -RelativePathWithMetadata IObjectStorageIteratorAsync::current() +RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current() { if (!isValid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator"); @@ -101,7 +101,7 @@ RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch() return current_batch; } -std::optional IObjectStorageIteratorAsync::getCurrrentBatchAndScheduleNext() +std::optional IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext() { std::lock_guard lock(mutex); if (!is_initialized) diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index a6abe03bac9..8d155f7ec8d 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -26,10 +26,10 @@ public: void next() override; void nextBatch() override; bool isValid() override; - RelativePathWithMetadata current() override; + RelativePathWithMetadataPtr current() override; RelativePathsWithMetadata currentBatch() override; size_t getAccumulatedSize() const override; - std::optional getCurrrentBatchAndScheduleNext() override; + std::optional getCurrentBatchAndScheduleNext() override; ~IObjectStorageIteratorAsync() override { diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 4cc49288af6..cc138c43c71 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -120,25 +120,22 @@ private: { ProfileEvents::increment(ProfileEvents::S3ListObjects); - bool result = false; auto outcome = client->ListObjectsV2(request); + /// Outcome failure will be handled on the caller side. if (outcome.IsSuccess()) { + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + auto objects = outcome.GetResult().GetContents(); - - result = !objects.empty(); - for (const auto & object : objects) - batch.emplace_back( - object.GetKey(), - ObjectMetadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}} - ); + { + ObjectMetadata metadata{static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}; + batch.emplace_back(std::make_shared(object.GetKey(), std::move(metadata))); + } - if (result) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return result; + /// It returns false when all objects were returned + return outcome.GetResult().GetIsTruncated(); } throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", @@ -249,7 +246,6 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (write_settings.s3_allow_parallel_part_upload) scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); - auto blob_storage_log = BlobStorageLogWriter::create(disk_name); if (blob_storage_log) blob_storage_log->local_path = object.local_path; @@ -300,12 +296,12 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet break; for (const auto & object : objects) - children.emplace_back( + children.emplace_back(std::make_shared( object.GetKey(), ObjectMetadata{ static_cast(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), - {}}); + {}})); if (max_keys) { diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 83814f42693..cc23f604278 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -4,9 +4,8 @@ #if USE_AZURE_BLOB_STORAGE -#include -#include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9a80553f149..d697d90c8a6 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -52,11 +52,9 @@ #include #include #include -#include #include -#include +#include #include -#include #include #include #include @@ -482,7 +480,7 @@ BlockIO InterpreterSystemQuery::execute() StorageURL::getSchemaCache(getContext()).clear(); #if USE_AZURE_BLOB_STORAGE if (caches_to_drop.contains("AZURE")) - StorageAzureBlob::getSchemaCache(getContext()).clear(); + StorageAzureBlobStorage::getSchemaCache(getContext()).clear(); #endif break; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e1086ac5833..58672a72563 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 3584f137225..55ff8fefdd5 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -4,8 +4,6 @@ #include #if USE_AWS_S3 && USE_PARQUET -#include -#include #include #include #include @@ -13,10 +11,10 @@ #include #include #include +#include #include #include #include -#include namespace fs = std::filesystem; @@ -29,8 +27,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -template -struct DeltaLakeMetadataParser::Impl +struct DeltaLakeMetadataParser::Impl { /** * Useful links: @@ -65,10 +62,13 @@ struct DeltaLakeMetadataParser::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles(const Configuration & configuration, ContextPtr context) + std::set processMetadataFiles( + ObjectStoragePtr object_storage, + const StorageObjectStorageConfiguration & configuration, + ContextPtr context) { std::set result_files; - const auto checkpoint_version = getCheckpointIfExists(result_files, configuration, context); + const auto checkpoint_version = getCheckpointIfExists(result_files, object_storage, configuration, context); if (checkpoint_version) { @@ -78,10 +78,10 @@ struct DeltaLakeMetadataParser::Impl const auto filename = withPadding(++current_version) + metadata_file_suffix; const auto file_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / filename; - if (!MetadataReadHelper::exists(file_path, configuration)) + if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files, configuration, context); + processMetadataFile(file_path, result_files, object_storage, configuration, context); } LOG_TRACE( @@ -90,16 +90,33 @@ struct DeltaLakeMetadataParser::Impl } else { - const auto keys = MetadataReadHelper::listFiles( - configuration, deltalake_metadata_directory, metadata_file_suffix); - + const auto keys = listFiles(object_storage, configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files, configuration, context); + processMetadataFile(key, result_files, object_storage, configuration, context); } return result_files; } + std::vector listFiles( + const ObjectStoragePtr & object_storage, + const StorageObjectStorageConfiguration & configuration, + const String & prefix, const String & suffix) + { + auto key = std::filesystem::path(configuration.getPath()) / prefix; + RelativePathsWithMetadata files_with_metadata; + object_storage->listObjects(key, files_with_metadata, 0); + Strings res; + for (const auto & file_with_metadata : files_with_metadata) + { + const auto & filename = file_with_metadata->relative_path; + if (filename.ends_with(suffix)) + res.push_back(filename); + } + LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size()); + return res; + } + /** * Example of content of a single .json metadata file: * " @@ -132,10 +149,12 @@ struct DeltaLakeMetadataParser::Impl void processMetadataFile( const String & key, std::set & result, - const Configuration & configuration, + ObjectStoragePtr object_storage, + const StorageObjectStorageConfiguration & configuration, ContextPtr context) { - auto buf = MetadataReadHelper::createReadBuffer(key, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(key), read_settings); char c; while (!buf->eof()) @@ -180,14 +199,18 @@ struct DeltaLakeMetadataParser::Impl * * We need to get "version", which is the version of the checkpoint we need to read. */ - size_t readLastCheckpointIfExists(const Configuration & configuration, ContextPtr context) + size_t readLastCheckpointIfExists( + ObjectStoragePtr object_storage, + const StorageObjectStorageConfiguration & configuration, + ContextPtr context) const { const auto last_checkpoint_file = fs::path(configuration.getPath()) / deltalake_metadata_directory / "_last_checkpoint"; - if (!MetadataReadHelper::exists(last_checkpoint_file, configuration)) + if (!object_storage->exists(StoredObject(last_checkpoint_file))) return 0; String json_str; - auto buf = MetadataReadHelper::createReadBuffer(last_checkpoint_file, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings); readJSONObjectPossiblyInvalid(json_str, *buf); const JSON json(json_str); @@ -237,9 +260,13 @@ struct DeltaLakeMetadataParser::Impl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result, const Configuration & configuration, ContextPtr context) + size_t getCheckpointIfExists( + std::set & result, + ObjectStoragePtr object_storage, + const StorageObjectStorageConfiguration & configuration, + ContextPtr context) { - const auto version = readLastCheckpointIfExists(configuration, context); + const auto version = readLastCheckpointIfExists(object_storage, configuration, context); if (!version) return 0; @@ -248,7 +275,8 @@ struct DeltaLakeMetadataParser::Impl LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string()); - auto buf = MetadataReadHelper::createReadBuffer(checkpoint_path, context, configuration); + auto read_settings = context->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings); auto format_settings = getFormatSettings(context); /// Force nullable, because this parquet file for some reason does not have nullable @@ -317,22 +345,17 @@ struct DeltaLakeMetadataParser::Impl LoggerPtr log = getLogger("DeltaLakeMetadataParser"); }; +DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) {} -template -DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) +Strings DeltaLakeMetadataParser::getFiles( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + ContextPtr context) { -} - -template -Strings DeltaLakeMetadataParser::getFiles(const Configuration & configuration, ContextPtr context) -{ - auto result = impl->processMetadataFiles(configuration, context); + auto result = impl->processMetadataFiles(object_storage, *configuration, context); return Strings(result.begin(), result.end()); } -template DeltaLakeMetadataParser::DeltaLakeMetadataParser(); -template Strings DeltaLakeMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); } #endif diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h index df7276b90b4..f94024597d6 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h @@ -2,17 +2,21 @@ #include #include +#include +#include namespace DB { -template struct DeltaLakeMetadataParser { public: - DeltaLakeMetadataParser(); + DeltaLakeMetadataParser(); - Strings getFiles(const Configuration & configuration, ContextPtr context); + Strings getFiles( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + ContextPtr context); private: struct Impl; diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index 699dfe8fda0..8571c035b32 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -1,16 +1,11 @@ #include +#include #include -#include #include #include #include "config.h" -#include #include -#if USE_AWS_S3 -#include -#include - namespace DB { @@ -19,98 +14,98 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template -struct HudiMetadataParser::Impl -{ - /** - * Useful links: - * - https://hudi.apache.org/tech-specs/ - * - https://hudi.apache.org/docs/file_layouts/ - */ +/** + * Useful links: + * - https://hudi.apache.org/tech-specs/ + * - https://hudi.apache.org/docs/file_layouts/ + */ - /** - * Hudi tables store metadata files and data files. - * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg, - * metadata is not required in order to understand which files we need to read, moreover, - * for Hudi metadata does not always exist. - * - * There can be two types of data files - * 1. base files (columnar file formats like Apache Parquet/Orc) - * 2. log files - * Currently we support reading only `base files`. - * Data file name format: - * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension] - * - * To find needed parts we need to find out latest part file for every file group for every partition. - * Explanation why: - * Hudi reads in and overwrites the entire table/partition with each update. - * Hudi controls the number of file groups under a single partition according to the - * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. - * Each file group is identified by File Id. - */ - Strings processMetadataFiles(const Configuration & configuration) +/** + * Hudi tables store metadata files and data files. + * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg, + * metadata is not required in order to understand which files we need to read, moreover, + * for Hudi metadata does not always exist. + * + * There can be two types of data files + * 1. base files (columnar file formats like Apache Parquet/Orc) + * 2. log files + * Currently we support reading only `base files`. + * Data file name format: + * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension] + * + * To find needed parts we need to find out latest part file for every file group for every partition. + * Explanation why: + * Hudi reads in and overwrites the entire table/partition with each update. + * Hudi controls the number of file groups under a single partition according to the + * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. + * Each file group is identified by File Id. + */ +std::vector listFiles( + const ObjectStoragePtr & object_storage, + const StorageObjectStorageConfiguration & configuration, + const String & prefix, const String & suffix) +{ + auto key = std::filesystem::path(configuration.getPath()) / prefix; + RelativePathsWithMetadata files_with_metadata; + object_storage->listObjects(key, files_with_metadata, 0); + Strings res; + for (const auto & file_with_metadata : files_with_metadata) { - auto log = getLogger("HudiMetadataParser"); - - const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format)); - - using Partition = std::string; - using FileID = std::string; - struct FileInfo - { - String key; - UInt64 timestamp = 0; - }; - std::unordered_map> data_files; - - for (const auto & key : keys) - { - auto key_file = std::filesystem::path(key); - Strings file_parts; - const String stem = key_file.stem(); - splitInto<'_'>(file_parts, stem); - if (file_parts.size() != 3) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); - - const auto partition = key_file.parent_path().stem(); - const auto & file_id = file_parts[0]; - const auto timestamp = parse(file_parts[2]); - - auto & file_info = data_files[partition][file_id]; - if (file_info.timestamp == 0 || file_info.timestamp < timestamp) - { - file_info.key = std::move(key); - file_info.timestamp = timestamp; - } - } - - Strings result; - for (auto & [partition, partition_data] : data_files) - { - LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); - for (auto & [file_id, file_data] : partition_data) - result.push_back(std::move(file_data.key)); - } - return result; + const auto & filename = file_with_metadata->relative_path; + if (filename.ends_with(suffix)) + res.push_back(filename); } -}; + LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size()); + return res; +} - -template -HudiMetadataParser::HudiMetadataParser() : impl(std::make_unique()) +Strings HudiMetadataParser::getFiles( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + ContextPtr) { + auto log = getLogger("HudiMetadataParser"); + + const auto keys = listFiles(object_storage, *configuration, "", Poco::toLower(configuration->format)); + + using Partition = std::string; + using FileID = std::string; + struct FileInfo + { + String key; + UInt64 timestamp = 0; + }; + std::unordered_map> data_files; + + for (const auto & key : keys) + { + auto key_file = std::filesystem::path(key); + Strings file_parts; + const String stem = key_file.stem(); + splitInto<'_'>(file_parts, stem); + if (file_parts.size() != 3) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); + + const auto partition = key_file.parent_path().stem(); + const auto & file_id = file_parts[0]; + const auto timestamp = parse(file_parts[2]); + + auto & file_info = data_files[partition][file_id]; + if (file_info.timestamp == 0 || file_info.timestamp < timestamp) + { + file_info.key = key; + file_info.timestamp = timestamp; + } + } + + Strings result; + for (auto & [partition, partition_data] : data_files) + { + LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); + for (auto & [file_id, file_data] : partition_data) + result.push_back(std::move(file_data.key)); + } + return result; } -template -Strings HudiMetadataParser::getFiles(const Configuration & configuration, ContextPtr) -{ - return impl->processMetadataFiles(configuration); } - -template HudiMetadataParser::HudiMetadataParser(); -template Strings HudiMetadataParser::getFiles( - const StorageS3::Configuration & configuration, ContextPtr); - -} - -#endif diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h index 6727ba2f718..2fc004595ca 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ b/src/Storages/DataLakes/HudiMetadataParser.h @@ -1,22 +1,17 @@ #pragma once #include -#include +#include +#include namespace DB { -template struct HudiMetadataParser { -public: - HudiMetadataParser(); - - Strings getFiles(const Configuration & configuration, ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; + Strings getFiles( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, ContextPtr context); }; } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index db3f835494f..934bf227c42 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -8,127 +8,91 @@ #include #include #include -#include +#include +#include namespace DB { -template -class IStorageDataLake : public Storage +template +class IStorageDataLake : public StorageObjectStorage { public: static constexpr auto name = Name::name; - using Configuration = typename Storage::Configuration; - template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_, {}, attach), context_, std::forward(args)...) - , base_configuration(configuration_) - , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + using Storage = StorageObjectStorage; + using ConfigurationPtr = Storage::ConfigurationPtr; - template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) + static StoragePtr create( + ConfigurationPtr base_configuration, + ContextPtr context, + const String & engine_name_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment_, + std::optional format_settings_, + bool /* attach */) { - return std::make_shared>(configuration_, context_, attach, std::forward(args)...); + auto object_storage = base_configuration->createOrUpdateObjectStorage(context); + + auto configuration = base_configuration->clone(); + configuration->getPaths() = MetadataParser().getFiles(object_storage, configuration, context); + + return std::make_shared>( + base_configuration, configuration, object_storage, engine_name_, context, + table_id_, columns_, constraints_, comment_, format_settings_); } String getName() const override { return name; } static ColumnsDescription getTableStructureFromData( - Configuration & base_configuration, - const std::optional & format_settings, + ObjectStoragePtr object_storage_, + ConfigurationPtr base_configuration, + const std::optional &, ContextPtr local_context) { - auto configuration = getConfigurationForDataRead(base_configuration, local_context); - return Storage::getTableStructureFromData(configuration, format_settings, local_context); + auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context); + return ColumnsDescription(metadata->getTableSchema()); } - static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) + std::pair updateConfigurationAndGetCopy(ContextPtr local_context) override { - return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false); + std::lock_guard lock(Storage::configuration_update_mutex); + + auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context); + bool updated = new_object_storage != nullptr; + if (updated) + Storage::object_storage = new_object_storage; + + auto new_keys = MetadataParser().getFiles(Storage::object_storage, base_configuration, local_context); + + if (updated || new_keys != Storage::configuration->getPaths()) + { + auto updated_configuration = base_configuration->clone(); + /// If metadata wasn't changed, we won't list data files again. + updated_configuration->getPaths() = new_keys; + Storage::configuration = updated_configuration; + } + return {Storage::configuration, Storage::object_storage}; } - Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override + template + explicit IStorageDataLake( + ConfigurationPtr base_configuration_, + Args &&... args) + : Storage(std::forward(args)...) + , base_configuration(base_configuration_) { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - return Storage::getConfiguration(); - } - - void updateConfiguration(ContextPtr local_context) override - { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); } private: - static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}, bool attach = false) - { - auto configuration{base_configuration}; - configuration.update(local_context); - configuration.static_configuration = true; - - try - { - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; - - LOG_TRACE( - getLogger("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); - - configuration.connect(local_context); - return configuration; - } - catch (...) - { - if (!attach) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - return configuration; - } - } - - static Strings getDataFiles(const Configuration & configuration, ContextPtr local_context) - { - return MetadataParser().getFiles(configuration, local_context); - } - - void updateConfigurationImpl(ContextPtr local_context) - { - const bool updated = base_configuration.update(local_context); - auto new_keys = getDataFiles(base_configuration, local_context); - - if (!updated && new_keys == Storage::getConfiguration().keys) - return; - - Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys)); - } - - Configuration base_configuration; - std::mutex configuration_update_mutex; + ConfigurationPtr base_configuration; LoggerPtr log; }; -template -static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) -{ - auto configuration = DataLake::getConfiguration(args.engine_args, args.getLocalContext()); - - /// Data lakes use parquet format, no need for schema inference. - if (configuration.format == "auto") - configuration.format = "Parquet"; - - return DataLake::create(configuration, args.getContext(), args.attach, args.table_id, args.columns, args.constraints, - args.comment, getFormatSettings(args.getContext())); -} - } #endif diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp index df1536f53fc..08cebb3f396 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include -#include +#include #include #include @@ -44,7 +44,8 @@ namespace ErrorCodes } IcebergMetadata::IcebergMetadata( - const StorageS3::Configuration & configuration_, + ObjectStoragePtr object_storage_, + StorageObjectStorageConfigurationPtr configuration_, DB::ContextPtr context_, Int32 metadata_version_, Int32 format_version_, @@ -52,6 +53,7 @@ IcebergMetadata::IcebergMetadata( Int32 current_schema_id_, DB::NamesAndTypesList schema_) : WithContext(context_) + , object_storage(object_storage_) , configuration(configuration_) , metadata_version(metadata_version_) , format_version(format_version_) @@ -331,21 +333,42 @@ MutableColumns parseAvro( return columns; } +std::vector listFiles( + const ObjectStoragePtr & object_storage, + const StorageObjectStorageConfiguration & configuration, + const String & prefix, const String & suffix) +{ + auto key = std::filesystem::path(configuration.getPath()) / prefix; + RelativePathsWithMetadata files_with_metadata; + object_storage->listObjects(key, files_with_metadata, 0); + Strings res; + for (const auto & file_with_metadata : files_with_metadata) + { + const auto & filename = file_with_metadata->relative_path; + if (filename.ends_with(suffix)) + res.push_back(filename); + } + LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size()); + return res; +} + /** * Each version of table metadata is stored in a `metadata` directory and * has one of 2 formats: * 1) v.metadata.json, where V - metadata version. * 2) -.metadata.json, where V - metadata version */ -std::pair getMetadataFileAndVersion(const StorageS3::Configuration & configuration) +std::pair getMetadataFileAndVersion( + ObjectStoragePtr object_storage, + const StorageObjectStorageConfiguration & configuration) { - const auto metadata_files = S3DataLakeMetadataReadHelper::listFiles(configuration, "metadata", ".metadata.json"); + const auto metadata_files = listFiles(object_storage, configuration, "metadata", ".metadata.json"); if (metadata_files.empty()) { throw Exception( ErrorCodes::FILE_DOESNT_EXIST, "The metadata file for Iceberg table with path {} doesn't exist", - configuration.url.key); + configuration.getPath()); } std::vector> metadata_files_with_versions; @@ -372,11 +395,15 @@ std::pair getMetadataFileAndVersion(const StorageS3::Configuratio } -std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_) +std::unique_ptr parseIcebergMetadata( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + ContextPtr context_) { - const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration); + const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration); LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path); - auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration); + auto read_settings = context_->getReadSettings(); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); String json_str; readJSONObjectPossiblyInvalid(json_str, *buf); @@ -397,12 +424,12 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura if (snapshot->getValue("snapshot-id") == current_snapshot_id) { const auto path = snapshot->getValue("manifest-list"); - manifest_list_file = std::filesystem::path(configuration.url.key) / "metadata" / std::filesystem::path(path).filename(); + manifest_list_file = std::filesystem::path(configuration->getPath()) / "metadata" / std::filesystem::path(path).filename(); break; } } - return std::make_unique(configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema); + return std::make_unique(object_storage, configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema); } /** @@ -441,12 +468,14 @@ Strings IcebergMetadata::getDataFiles() LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file); - auto manifest_list_buf = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_list_file, getContext(), configuration); + auto context = getContext(); + auto read_settings = context->getReadSettings(); + auto manifest_list_buf = object_storage->readObject(StoredObject(manifest_list_file), read_settings); auto manifest_list_file_reader = std::make_unique(std::make_unique(*manifest_list_buf)); auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0)); Block header{{data_type->createColumn(), data_type, "manifest_path"}}; - auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(getContext())); + auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(context)); auto & col = columns.at(0); if (col->getDataType() != TypeIndex::String) @@ -462,7 +491,7 @@ Strings IcebergMetadata::getDataFiles() { const auto file_path = col_str->getDataAt(i).toView(); const auto filename = std::filesystem::path(file_path).filename(); - manifest_files.emplace_back(std::filesystem::path(configuration.url.key) / "metadata" / filename); + manifest_files.emplace_back(std::filesystem::path(configuration->getPath()) / "metadata" / filename); } NameSet files; @@ -471,7 +500,7 @@ Strings IcebergMetadata::getDataFiles() { LOG_TEST(log, "Process manifest file {}", manifest_file); - auto buffer = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_file, getContext(), configuration); + auto buffer = object_storage->readObject(StoredObject(manifest_file), read_settings); auto manifest_file_reader = std::make_unique(std::make_unique(*buffer)); /// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema, @@ -482,7 +511,7 @@ Strings IcebergMetadata::getDataFiles() Poco::JSON::Parser parser; Poco::Dynamic::Var json = parser.parse(schema_json_string); Poco::JSON::Object::Ptr schema_object = json.extract(); - if (!getContext()->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) + if (!context->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not " @@ -595,9 +624,9 @@ Strings IcebergMetadata::getDataFiles() const auto status = status_int_column->getInt(i); const auto data_path = std::string(file_path_string_column->getDataAt(i).toView()); - const auto pos = data_path.find(configuration.url.key); + const auto pos = data_path.find(configuration->getPath()); if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration->getPath(), data_path); const auto file_path = data_path.substr(pos); diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h index 3e6a2ec3415..92946e4192b 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h @@ -2,9 +2,10 @@ #if USE_AWS_S3 && USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. -#include #include #include +#include +#include namespace DB { @@ -59,13 +60,15 @@ namespace DB class IcebergMetadata : WithContext { public: - IcebergMetadata(const StorageS3::Configuration & configuration_, - ContextPtr context_, - Int32 metadata_version_, - Int32 format_version_, - String manifest_list_file_, - Int32 current_schema_id_, - NamesAndTypesList schema_); + IcebergMetadata( + ObjectStoragePtr object_storage_, + StorageObjectStorageConfigurationPtr configuration_, + ContextPtr context_, + Int32 metadata_version_, + Int32 format_version_, + String manifest_list_file_, + Int32 current_schema_id_, + NamesAndTypesList schema_); /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files. /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file) @@ -77,7 +80,8 @@ public: size_t getVersion() const { return metadata_version; } private: - const StorageS3::Configuration configuration; + ObjectStoragePtr object_storage; + StorageObjectStorageConfigurationPtr configuration; Int32 metadata_version; Int32 format_version; String manifest_list_file; @@ -88,7 +92,10 @@ private: }; -std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context); +std::unique_ptr parseIcebergMetadata( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + ContextPtr context); } diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp index 8a1a2cdbd8f..ad1a27c312b 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -5,85 +5,6 @@ namespace DB { -StoragePtr StorageIceberg::create( - const DB::StorageIceberg::Configuration & base_configuration, - DB::ContextPtr context_, - bool attach, - const DB::StorageID & table_id_, - const DB::ColumnsDescription & columns_, - const DB::ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_) -{ - auto configuration{base_configuration}; - configuration.update(context_); - std::unique_ptr metadata; - NamesAndTypesList schema_from_metadata; - try - { - metadata = parseIcebergMetadata(configuration, context_); - schema_from_metadata = metadata->getTableSchema(); - configuration.keys = metadata->getDataFiles(); - } - catch (...) - { - if (!attach) - throw; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - return std::make_shared( - std::move(metadata), - configuration, - context_, - table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, - constraints_, - comment, - format_settings_); -} - -StorageIceberg::StorageIceberg( - std::unique_ptr metadata_, - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_) - : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_) - , current_metadata(std::move(metadata_)) - , base_configuration(configuration_) -{ -} - -ColumnsDescription StorageIceberg::getTableStructureFromData( - Configuration & base_configuration, - const std::optional &, - ContextPtr local_context) -{ - auto configuration{base_configuration}; - configuration.update(local_context); - auto metadata = parseIcebergMetadata(configuration, local_context); - return ColumnsDescription(metadata->getTableSchema()); -} - -void StorageIceberg::updateConfigurationImpl(ContextPtr local_context) -{ - const bool updated = base_configuration.update(local_context); - auto new_metadata = parseIcebergMetadata(base_configuration, local_context); - - if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) - current_metadata = std::move(new_metadata); - else if (!updated) - return; - - auto updated_configuration{base_configuration}; - /// If metadata wasn't changed, we won't list data files again. - updated_configuration.keys = current_metadata->getDataFiles(); - StorageS3::useConfiguration(updated_configuration); -} } diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index 4e63da5508a..bca6e3c868f 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -4,13 +4,13 @@ #if USE_AWS_S3 && USE_AVRO -# include -# include -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -21,65 +21,100 @@ namespace DB /// many Iceberg features like schema evolution, partitioning, positional and equality deletes. /// TODO: Implement Iceberg as a separate storage using IObjectStorage /// (to support all object storages, not only S3) and add support for missing Iceberg features. -class StorageIceberg : public StorageS3 +template +class StorageIceberg : public StorageObjectStorage { public: static constexpr auto name = "Iceberg"; + using Storage = StorageObjectStorage; + using ConfigurationPtr = Storage::ConfigurationPtr; - using Configuration = StorageS3::Configuration; - - static StoragePtr create(const Configuration & base_configuration, - ContextPtr context_, - bool attach, + static StoragePtr create( + ConfigurationPtr base_configuration, + ContextPtr context, + const String & engine_name_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_); + const String & comment_, + std::optional format_settings_, + bool attach) + { + auto object_storage = base_configuration->createOrUpdateObjectStorage(context); + std::unique_ptr metadata; + NamesAndTypesList schema_from_metadata; + try + { + metadata = parseIcebergMetadata(object_storage, base_configuration, context); + schema_from_metadata = metadata->getTableSchema(); + } + catch (...) + { + if (!attach) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + } - StorageIceberg( - std::unique_ptr metadata_, - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_); + auto configuration = base_configuration->clone(); + configuration->getPaths() = metadata->getDataFiles(); + + return std::make_shared>( + base_configuration, std::move(metadata), configuration, object_storage, engine_name_, context, + table_id_, + columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + constraints_, comment_, format_settings_); + } String getName() const override { return name; } static ColumnsDescription getTableStructureFromData( - Configuration & base_configuration, + ObjectStoragePtr object_storage_, + ConfigurationPtr base_configuration, const std::optional &, - ContextPtr local_context); - - static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) + ContextPtr local_context) { - return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false); + auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context); + return ColumnsDescription(metadata->getTableSchema()); } - Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override + std::pair updateConfigurationAndGetCopy(ContextPtr local_context) override { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); - return StorageS3::getConfiguration(); + std::lock_guard lock(Storage::configuration_update_mutex); + + auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context); + bool updated = new_object_storage != nullptr; + if (updated) + Storage::object_storage = new_object_storage; + + auto new_metadata = parseIcebergMetadata(Storage::object_storage, base_configuration, local_context); + + if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) + current_metadata = std::move(new_metadata); + else if (updated) + { + auto updated_configuration = base_configuration->clone(); + /// If metadata wasn't changed, we won't list data files again. + updated_configuration->getPaths() = current_metadata->getDataFiles(); + Storage::configuration = updated_configuration; + } + return {Storage::configuration, Storage::object_storage}; } - void updateConfiguration(ContextPtr local_context) override + template + StorageIceberg( + ConfigurationPtr base_configuration_, + std::unique_ptr metadata_, + Args &&... args) + : Storage(std::forward(args)...) + , base_configuration(base_configuration_) + , current_metadata(std::move(metadata_)) { - std::lock_guard lock(configuration_update_mutex); - updateConfigurationImpl(local_context); } private: - void updateConfigurationImpl(ContextPtr local_context); - + ConfigurationPtr base_configuration; std::unique_ptr current_metadata; - Configuration base_configuration; - std::mutex configuration_update_mutex; }; - } #endif diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp deleted file mode 100644 index d66e21550a3..00000000000 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int S3_ERROR; -} - -std::shared_ptr -S3DataLakeMetadataReadHelper::createReadBuffer(const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration) -{ - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; - return std::make_shared( - base_configuration.client, - base_configuration.url.bucket, - key, - base_configuration.url.version_id, - request_settings, - context->getReadSettings()); -} - -bool S3DataLakeMetadataReadHelper::exists(const String & key, const StorageS3::Configuration & configuration) -{ - return S3::objectExists(*configuration.client, configuration.url.bucket, key); -} - -std::vector S3DataLakeMetadataReadHelper::listFiles( - const StorageS3::Configuration & base_configuration, const String & prefix, const String & suffix) -{ - const auto & table_path = base_configuration.url.key; - const auto & bucket = base_configuration.url.bucket; - const auto & client = base_configuration.client; - - std::vector res; - S3::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; - - request.SetBucket(bucket); - request.SetPrefix(std::filesystem::path(table_path) / prefix); - - bool is_finished{false}; - while (!is_finished) - { - outcome = client->ListObjectsV2(request); - if (!outcome.IsSuccess()) - throw S3Exception( - outcome.GetError().GetErrorType(), - "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", - quoteString(bucket), - quoteString(base_configuration.url.key), - backQuote(outcome.GetError().GetExceptionName()), - quoteString(outcome.GetError().GetMessage())); - - const auto & result_batch = outcome.GetResult().GetContents(); - for (const auto & obj : result_batch) - { - const auto & filename = obj.GetKey(); - if (filename.ends_with(suffix)) - res.push_back(filename); - } - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - is_finished = !outcome.GetResult().GetIsTruncated(); - } - - LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); - - return res; -} - -} -#endif diff --git a/src/Storages/DataLakes/S3MetadataReader.h b/src/Storages/DataLakes/S3MetadataReader.h deleted file mode 100644 index cae7dd1fa3d..00000000000 --- a/src/Storages/DataLakes/S3MetadataReader.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - -#if USE_AWS_S3 - -#include - -class ReadBuffer; - -namespace DB -{ - -struct S3DataLakeMetadataReadHelper -{ - static std::shared_ptr createReadBuffer( - const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration); - - static bool exists(const String & key, const StorageS3::Configuration & configuration); - - static std::vector listFiles(const StorageS3::Configuration & configuration, const std::string & prefix = "", const std::string & suffix = ""); -}; -} - -#endif diff --git a/src/Storages/DataLakes/StorageDeltaLake.h b/src/Storages/DataLakes/StorageDeltaLake.h index 8b4ba28d6f7..07c2205d2df 100644 --- a/src/Storages/DataLakes/StorageDeltaLake.h +++ b/src/Storages/DataLakes/StorageDeltaLake.h @@ -5,11 +5,6 @@ #include #include "config.h" -#if USE_AWS_S3 -#include -#include -#endif - namespace DB { @@ -19,7 +14,7 @@ struct StorageDeltaLakeName }; #if USE_AWS_S3 && USE_PARQUET -using StorageDeltaLakeS3 = IStorageDataLake>; +using StorageDeltaLakeS3 = IStorageDataLake; #endif } diff --git a/src/Storages/DataLakes/StorageHudi.h b/src/Storages/DataLakes/StorageHudi.h index 84666f51405..3fd52c82d32 100644 --- a/src/Storages/DataLakes/StorageHudi.h +++ b/src/Storages/DataLakes/StorageHudi.h @@ -5,11 +5,6 @@ #include #include "config.h" -#if USE_AWS_S3 -#include -#include -#endif - namespace DB { @@ -19,7 +14,7 @@ struct StorageHudiName }; #if USE_AWS_S3 -using StorageHudiS3 = IStorageDataLake>; +using StorageHudiS3 = IStorageDataLake; #endif } diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp index 118600f7212..2647fbce39d 100644 --- a/src/Storages/DataLakes/registerDataLakes.cpp +++ b/src/Storages/DataLakes/registerDataLakes.cpp @@ -6,43 +6,43 @@ #include #include #include +#include namespace DB { -#define REGISTER_DATA_LAKE_STORAGE(STORAGE, NAME) \ - factory.registerStorage( \ - NAME, \ - [](const StorageFactory::Arguments & args) \ - { \ - return createDataLakeStorage(args);\ - }, \ - { \ - .supports_settings = false, \ - .supports_schema_inference = true, \ - .source_access_type = AccessType::S3, \ - }); - #if USE_PARQUET -void registerStorageDeltaLake(StorageFactory & factory) +void registerStorageDeltaLake(StorageFactory & ) { - REGISTER_DATA_LAKE_STORAGE(StorageDeltaLakeS3, StorageDeltaLakeName::name) + // factory.registerStorage( + // StorageDeltaLakeName::name, + // [&](const StorageFactory::Arguments & args) + // { + // auto configuration = std::make_shared(); + // return IStorageDataLake::create( + // configuration, args.getContext(), "deltaLake", args.table_id, args.columns, + // args.constraints, args.comment, std::nullopt, args.attach); + // }, + // { + // .supports_settings = false, + // .supports_schema_inference = true, + // .source_access_type = AccessType::S3, + // }); } #endif #if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. -void registerStorageIceberg(StorageFactory & factory) +void registerStorageIceberg(StorageFactory &) { - REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name) + // REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name) } #endif -void registerStorageHudi(StorageFactory & factory) +void registerStorageHudi(StorageFactory &) { - REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name) } } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index ab21c4946e4..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1117 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - ContextPtr context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - auto columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - storage_metadata.setColumns(columns); - } - else - { - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - const String & format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(format_) - , compression_method(compression_method_) - { - } - - std::pair, std::optional> next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. " - "You must specify table structure manually", format); - return {nullptr, std::nullopt}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(getContext()); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - auto cache_key = getKeyForSchemaCache(url, format, {}, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - const String & format; - const String & compression_method; - size_t current_index = 0; - }; -} - -ColumnsDescription StorageHDFS::getTableStructureFromData( - const String & format, - const String & uri, - const String & compression_method, - ContextPtr ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path." - " You must specify table structure manually", format); - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx); -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - ContextPtr context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - ContextPtr context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; - - ReadFromHDFS( - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - ContextPtr context_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , context(std::move(context_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - ContextPtr context; - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters() -{ - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - context_, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->virtual_columns, context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->virtual_columns, context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.back(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - bool truncate_on_insert = context_->getSettingsRef().hdfs_truncate_on_insert; - if (!truncate_on_insert && !hdfsExists(fs.get(), path_from_uri.c_str())) - { - if (context_->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uris[0].find_first_of('.', uris[0].find_last_of('/')); - size_t index = uris.size(); - String new_uri; - do - { - new_uri = uris[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : uris[0].substr(pos)); - ++index; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - uris.push_back(new_uri); - current_uri = new_uri; - } - else - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().getFormatFromFileName(url, true); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -NamesAndTypesList StorageHDFS::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageHDFS::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h deleted file mode 100644 index 7170763c959..00000000000 --- a/src/Storages/HDFS/StorageHDFS.h +++ /dev/null @@ -1,179 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class IInputFormat; - -/** - * This class represents table engine for external hdfs files. - * Read method is supported for now. - */ -class StorageHDFS final : public IStorage, WithContext -{ -public: - struct PathInfo - { - time_t last_mod_time; - size_t size; - }; - - struct PathWithInfo - { - PathWithInfo() = default; - PathWithInfo(const String & path_, const std::optional & info_) : path(path_), info(info_) {} - String path; - std::optional info; - }; - - StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - ContextPtr context_, - const String & compression_method_ = "", - bool distributed_processing_ = false, - ASTPtr partition_by = nullptr); - - String getName() const override { return "HDFS"; } - - void read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override; - - void truncate( - const ASTPtr & query, - const StorageMetadataPtr & metadata_snapshot, - ContextPtr local_context, - TableExclusiveLockHolder &) override; - - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - - bool supportsPartitionBy() const override { return true; } - - /// Check if the format is column-oriented. - /// Is is useful because column oriented formats could effectively skip unknown columns - /// So we can create a header of only required columns in read method and ask - /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns(const ContextPtr & context_) const; - - bool supportsSubcolumns() const override { return true; } - - static ColumnsDescription getTableStructureFromData( - const String & format, - const String & uri, - const String & compression_method, - ContextPtr ctx); - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - bool supportsTrivialCountOptimization() const override { return true; } - -protected: - friend class HDFSSource; - friend class ReadFromHDFS; - -private: - std::vector uris; - String format_name; - String compression_method; - const bool distributed_processing; - ASTPtr partition_by; - bool is_path_with_globs; - NamesAndTypesList virtual_columns; - - LoggerPtr log = getLogger("StorageHDFS"); -}; - -class PullingPipelineExecutor; - -class HDFSSource : public ISource, WithContext -{ -public: - class DisclosedGlobIterator - { - public: - DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context); - StorageHDFS::PathWithInfo next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class URISIterator - { - public: - URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context); - StorageHDFS::PathWithInfo next(); - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - using IteratorWrapper = std::function; - using StorageHDFSPtr = std::shared_ptr; - - HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - ContextPtr context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_); - - String getName() const override; - - Chunk generate() override; - -private: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info); - - StorageHDFSPtr storage; - Block block_for_format; - NamesAndTypesList requested_columns; - NamesAndTypesList requested_virtual_columns; - UInt64 max_block_size; - std::shared_ptr file_iterator; - ColumnsDescription columns_description; - bool need_only_count; - size_t total_rows_in_file = 0; - - std::unique_ptr read_buf; - std::shared_ptr input_format; - std::unique_ptr pipeline; - std::unique_ptr reader; - String current_path; - std::optional current_file_size; - - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. - bool initialize(); -}; -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp deleted file mode 100644 index fad29436102..00000000000 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "config.h" -#include "Interpreters/Context_fwd.h" - -#if USE_HDFS - -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageHDFSCluster::StorageHDFSCluster( - ContextPtr context_, - const String & cluster_name_, - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & compression_method_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) - , uri(uri_) - , format_name(format_name_) - , compression_method(compression_method_) -{ - checkHDFSURL(uri_); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); - storage_metadata.setColumns(columns); - } - else - storage_metadata.setColumns(columns_); - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query)); - - TableFunctionHDFSCluster::addColumnsStructureToArguments(expression_list->children, structure, context); -} - - -RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared(uri, predicate, virtual_columns, context); - auto callback = std::make_shared>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; }); - return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; -} - -NamesAndTypesList StorageHDFSCluster::getVirtuals() const -{ - return NamesAndTypesList{ - {"_path", std::make_shared(std::make_shared())}, - {"_file", std::make_shared(std::make_shared())}}; -} - -} - -#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h deleted file mode 100644 index 7c4c41a573a..00000000000 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include - -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageHDFSCluster : public IStorageCluster -{ -public: - StorageHDFSCluster( - ContextPtr context_, - const String & cluster_name_, - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & compression_method_, - bool structure_argument_was_provided_); - - std::string getName() const override { return "HDFSCluster"; } - - NamesAndTypesList getVirtuals() const override; - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization() const override { return true; } - -private: - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; - - String uri; - String format_name; - String compression_method; - NamesAndTypesList virtual_columns; -}; - - -} - -#endif diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4fa6bfdd617..26301472f24 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -98,9 +98,14 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_) + explicit IStorage(StorageID storage_id_, std::unique_ptr metadata_ = nullptr) : storage_id(std::move(storage_id_)) - , metadata(std::make_unique()) {} + { + if (metadata_) + metadata.set(std::move(metadata_)); + else + metadata.set(std::make_unique()); + } IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; diff --git a/src/Storages/ObjectStorage/AzureConfiguration.cpp b/src/Storages/ObjectStorage/AzureConfiguration.cpp new file mode 100644 index 00000000000..ba3e796223a --- /dev/null +++ b/src/Storages/ObjectStorage/AzureConfiguration.cpp @@ -0,0 +1,451 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +const std::unordered_set required_configuration_keys = { + "blob_path", + "container", +}; + +const std::unordered_set optional_configuration_keys = { + "format", + "compression", + "structure", + "compression_method", + "account_name", + "account_key", + "connection_string", + "storage_account_url", +}; + +using AzureClient = Azure::Storage::Blobs::BlobContainerClient; +using AzureClientPtr = std::unique_ptr; + +namespace +{ + bool isConnectionString(const std::string & candidate) + { + return !candidate.starts_with("http"); + } + + bool containerExists(std::unique_ptr & blob_service_client, std::string container_name) + { + Azure::Storage::Blobs::ListBlobContainersOptions options; + options.Prefix = container_name; + options.PageSizeHint = 1; + + auto containers_list_response = blob_service_client->ListBlobContainers(options); + auto containers_list = containers_list_response.BlobContainers; + + for (const auto & container : containers_list) + { + if (container_name == container.Name) + return true; + } + return false; + } +} + +void StorageAzureBlobConfiguration::check(ContextPtr context) const +{ + Poco::URI url_to_check; + if (is_connection_string) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); + url_to_check = Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); + } + else + url_to_check = Poco::URI(connection_url); + + context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check); +} + +StorageObjectStorageConfigurationPtr StorageAzureBlobConfiguration::clone() +{ + auto configuration = std::make_shared(); + configuration->connection_url = connection_url; + configuration->is_connection_string = is_connection_string; + configuration->account_name = account_name; + configuration->account_key = account_key; + configuration->container = container; + configuration->blob_path = blob_path; + configuration->blobs_paths = blobs_paths; + return configuration; +} + +AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context) +{ + const auto & context_settings = context->getSettingsRef(); + auto settings_ptr = std::make_unique(); + settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; + settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; + settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); + return settings_ptr; +} + +ObjectStoragePtr StorageAzureBlobConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +{ + auto client = createClient(is_readonly); + auto settings = createSettings(context); + return std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), container); +} + +AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) +{ + using namespace Azure::Storage::Blobs; + + AzureClientPtr result; + + if (is_connection_string) + { + auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); + result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); + bool container_exists = containerExists(blob_service_client, container); + + if (!container_exists) + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); + + try + { + result->CreateIfNotExists(); + } catch (const Azure::Storage::StorageException & e) + { + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) + { + throw; + } + } + } + } + else + { + std::shared_ptr storage_shared_key_credential; + if (account_name.has_value() && account_key.has_value()) + { + storage_shared_key_credential = + std::make_shared(*account_name, *account_key); + } + + std::unique_ptr blob_service_client; + if (storage_shared_key_credential) + { + blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); + } + else + { + blob_service_client = std::make_unique(connection_url); + } + + bool container_exists = containerExists(blob_service_client, container); + + std::string final_url; + size_t pos = connection_url.find('?'); + if (pos != std::string::npos) + { + auto url_without_sas = connection_url.substr(0, pos); + final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container + + connection_url.substr(pos); + } + else + final_url + = connection_url + (connection_url.back() == '/' ? "" : "/") + container; + + if (container_exists) + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); + } + else + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); + try + { + result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); + } + catch (const Azure::Storage::StorageException & e) + { + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.") + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); + } + else + { + throw; + } + } + } + } + + return result; +} + +void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection & collection) +{ + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + + if (collection.has("connection_string")) + { + connection_url = collection.get("connection_string"); + is_connection_string = true; + } + + if (collection.has("storage_account_url")) + { + connection_url = collection.get("storage_account_url"); + is_connection_string = false; + } + + container = collection.get("container"); + blob_path = collection.get("blob_path"); + + if (collection.has("account_name")) + account_name = collection.get("account_name"); + + if (collection.has("account_key")) + account_key = collection.get("account_key"); + + structure = collection.getOrDefault("structure", "auto"); + format = collection.getOrDefault("format", format); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + + blobs_paths = {blob_path}; + if (format == "auto") + format = FormatFactory::instance().getFormatFromFileName(blob_path, true); +} + +void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) +{ + if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7)) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage AzureBlobStorage requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure)])"); + } + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); + + std::unordered_map engine_args_to_idx; + + connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + is_connection_string = isConnectionString(connection_url); + + container = checkAndGetLiteralArgument(engine_args[1], "container"); + blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + + auto is_format_arg = [] (const std::string & s) -> bool + { + return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); + }; + + if (engine_args.size() == 4) + { + //'c1 UInt64, c2 UInt64 + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + format = fourth_arg; + } + else + { + if (with_structure) + structure = fourth_arg; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); + } + } + else if (engine_args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + format = fourth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + } + } + else if (engine_args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + if (with_structure) + { + format = fourth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + structure = checkAndGetLiteralArgument(engine_args[5], "structure"); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (is_format_arg(sixth_arg)) + format = sixth_arg; + else + { + if (with_structure) + structure = sixth_arg; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + } + } + } + else if (engine_args.size() == 7) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (!with_structure && is_format_arg(fourth_arg)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); + } + else + { + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (!is_format_arg(sixth_arg)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + format = sixth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); + } + } + else if (with_structure && engine_args.size() == 8) + { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + account_name = fourth_arg; + account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); + if (!is_format_arg(sixth_arg)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); + format = sixth_arg; + compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); + structure = checkAndGetLiteralArgument(engine_args[7], "structure"); + } + + blobs_paths = {blob_path}; + + if (format == "auto") + format = FormatFactory::instance().getFormatFromFileName(blob_path, true); +} + +void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + if (args.size() < 3 || args.size() > 8) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "StorageObjectStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); + } + + auto structure_literal = std::make_shared(structure_); + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + if (args.size() == 3) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else if (args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + } + args.push_back(structure_literal); + } + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 7) + { + args.push_back(structure_literal); + } + else if (args.size() == 8) + { + args.back() = structure_literal; + } + } +} + +} diff --git a/src/Storages/ObjectStorage/AzureConfiguration.h b/src/Storages/ObjectStorage/AzureConfiguration.h new file mode 100644 index 00000000000..40d718d7690 --- /dev/null +++ b/src/Storages/ObjectStorage/AzureConfiguration.h @@ -0,0 +1,54 @@ +#pragma once +#include +#include + +namespace DB +{ +class BackupFactory; + +class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration +{ + friend class BackupReaderAzureBlobStorage; + friend class BackupWriterAzureBlobStorage; + friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory); + +public: + StorageAzureBlobConfiguration() = default; + StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other); + + Path getPath() const override { return blob_path; } + void setPath(const Path & path) override { blob_path = path; } + + const Paths & getPaths() const override { return blobs_paths; } + Paths & getPaths() override { return blobs_paths; } + + String getDataSourceDescription() override { return fs::path(connection_url) / container; } + String getNamespace() const override { return container; } + + void check(ContextPtr context) const override; + StorageObjectStorageConfigurationPtr clone() override; + ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context); + +protected: + using AzureClient = Azure::Storage::Blobs::BlobContainerClient; + using AzureClientPtr = std::unique_ptr; + + std::string connection_url; + bool is_connection_string; + + std::optional account_name; + std::optional account_key; + + std::string container; + std::string blob_path; + std::vector blobs_paths; + + AzureClientPtr createClient(bool is_read_only); + AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); +}; + +} diff --git a/src/Storages/ObjectStorage/Configuration.h b/src/Storages/ObjectStorage/Configuration.h new file mode 100644 index 00000000000..708041980e3 --- /dev/null +++ b/src/Storages/ObjectStorage/Configuration.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include + +namespace DB +{ + +class StorageObjectStorageConfiguration; +using StorageObjectStorageConfigurationPtr = std::shared_ptr; + +class StorageObjectStorageConfiguration +{ +public: + StorageObjectStorageConfiguration() = default; + virtual ~StorageObjectStorageConfiguration() = default; + + using Path = std::string; + using Paths = std::vector; + + virtual Path getPath() const = 0; + virtual void setPath(const Path & path) = 0; + + virtual const Paths & getPaths() const = 0; + virtual Paths & getPaths() = 0; + + virtual String getDataSourceDescription() = 0; + virtual String getNamespace() const = 0; + + bool isPathWithGlobs() const { return getPath().find_first_of("*?{") != std::string::npos; } + bool isNamespaceWithGlobs() const { return getNamespace().find_first_of("*?{") != std::string::npos; } + + std::string getPathWithoutGlob() const { return getPath().substr(0, getPath().find_first_of("*?{")); } + + virtual bool withWildcard() const + { + static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + return getPath().find(PARTITION_ID_WILDCARD) != String::npos; + } + + virtual void check(ContextPtr context) const = 0; + virtual StorageObjectStorageConfigurationPtr clone() = 0; + + virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT + + virtual void fromNamedCollection(const NamedCollection & collection) = 0; + virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; + + String format = "auto"; + String compression_method = "auto"; + String structure = "auto"; +}; + +using StorageObjectStorageConfigurationPtr = std::shared_ptr; + +} diff --git a/src/Storages/ObjectStorage/HDFSConfiguration.h b/src/Storages/ObjectStorage/HDFSConfiguration.h new file mode 100644 index 00000000000..f42cedf459d --- /dev/null +++ b/src/Storages/ObjectStorage/HDFSConfiguration.h @@ -0,0 +1,81 @@ +#pragma once +#include "config.h" + +#if USE_HDFS + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +class StorageHDFSConfiguration : public StorageObjectStorageConfiguration +{ +public: + Path getPath() const override { return path; } + void setPath(const Path & path_) override { path = path_; } + + const Paths & getPaths() const override { return paths; } + Paths & getPaths() override { return paths; } + + String getNamespace() const override { return ""; } + String getDataSourceDescription() override { return url; } + + void check(ContextPtr context) const override + { + context->getRemoteHostFilter().checkURL(Poco::URI(url)); + checkHDFSURL(url); + } + StorageObjectStorageConfigurationPtr clone() override + { + auto configuration = std::make_shared(); + return configuration; + } + + ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override /// NOLINT + { + UNUSED(is_readonly); + auto settings = std::make_unique(); + return std::make_shared(url, std::move(settings), context->getConfigRef()); + } + + void fromNamedCollection(const NamedCollection &) override {} + void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override + { + url = checkAndGetLiteralArgument(args[0], "url"); + + String format_name = "auto"; + if (args.size() > 1) + format_name = checkAndGetLiteralArgument(args[1], "format_name"); + + if (format_name == "auto") + format_name = FormatFactory::instance().getFormatFromFileName(url, true); + + String compression_method; + if (args.size() == 3) + { + compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); + } else compression_method = "auto"; + + } + static void addStructureToArgs(ASTs &, const String &, ContextPtr) {} + +private: + String url; + String path; + std::vector paths; +}; + +} + +#endif diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h new file mode 100644 index 00000000000..248700e2edf --- /dev/null +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -0,0 +1,197 @@ +#pragma once +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + +} + +template +class ReadBufferIterator : public IReadBufferIterator, WithContext +{ +public: + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + using FileIterator = std::shared_ptr; + using ObjectInfos = typename Storage::ObjectInfos; + + ReadBufferIterator( + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const FileIterator & file_iterator_, + const std::optional & format_settings_, + ObjectInfos & read_keys_, + const ContextPtr & context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , file_iterator(file_iterator_) + , format_settings(format_settings_) + , storage_settings(StorageSettings::create(context_->getSettingsRef())) + , read_keys(read_keys_) + , prev_read_keys_size(read_keys_.size()) + { + } + + std::pair, std::optional> next() override + { + /// For default mode check cached columns for currently read keys on first iteration. + if (first && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + return {nullptr, cached_columns}; + } + + current_object_info = file_iterator->next(0); + if (current_object_info->relative_path.empty()) + { + if (first) + { + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, " + "because there are no files with provided path. " + "You must specify table structure manually", + configuration->format); + } + return {nullptr, std::nullopt}; + } + + first = false; + + /// File iterator could get new keys after new iteration, + /// check them in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT + && read_keys.size() > prev_read_keys_size) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + prev_read_keys_size = read_keys.size(); + if (columns_from_cache) + return {nullptr, columns_from_cache}; + } + else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) + { + ObjectInfos paths = {current_object_info}; + if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) + return {nullptr, columns_from_cache}; + } + + first = false; + + std::unique_ptr read_buffer = object_storage->readObject( + StoredObject(current_object_info->relative_path), + getContext()->getReadSettings(), + {}, + current_object_info->metadata.size_bytes); + + read_buffer = wrapReadBufferWithCompressionMethod( + std::move(read_buffer), + chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), + static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + + return {std::move(read_buffer), std::nullopt}; + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (storage_settings.schema_inference_use_cache) + { + Storage::getSchemaCache(getContext()).addNumRows( + getKeyForSchemaCache(current_object_info->relative_path), num_rows); + } + } + + void setSchemaToLastFile(const ColumnsDescription & columns) override + { + if (storage_settings.schema_inference_use_cache + && storage_settings.schema_inference_mode == SchemaInferenceMode::UNION) + { + Storage::getSchemaCache(getContext()).addColumns( + getKeyForSchemaCache(current_object_info->relative_path), columns); + } + } + + void setResultingSchema(const ColumnsDescription & columns) override + { + if (storage_settings.schema_inference_use_cache + && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + Storage::getSchemaCache(getContext()).addManyColumns(getPathsForSchemaCache(), columns); + } + } + + String getLastFileName() const override { return current_object_info->relative_path; } + +private: + SchemaCache::Key getKeyForSchemaCache(const String & path) const + { + auto source = fs::path(configuration->getDataSourceDescription()) / path; + return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); + } + + SchemaCache::Keys getPathsForSchemaCache() const + { + Strings sources; + sources.reserve(read_keys.size()); + std::transform( + read_keys.begin(), read_keys.end(), + std::back_inserter(sources), + [&](const auto & elem) + { + return fs::path(configuration->getDataSourceDescription()) / elem->relative_path; + }); + return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext()); + } + + std::optional tryGetColumnsFromCache( + const ObjectInfos::iterator & begin, + const ObjectInfos::iterator & end) + { + if (!storage_settings.schema_inference_use_cache) + return std::nullopt; + + auto & schema_cache = Storage::getSchemaCache(getContext()); + for (auto it = begin; it < end; ++it) + { + const auto & object_info = (*it); + auto get_last_mod_time = [&] -> std::optional + { + if (object_info->metadata.last_modified) + return object_info->metadata.last_modified->epochMicroseconds(); + else + { + object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); + return object_info->metadata.last_modified->epochMicroseconds(); + } + }; + + auto cache_key = getKeyForSchemaCache(object_info->relative_path); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); + if (columns) + return columns; + } + + return std::nullopt; + } + + ObjectStoragePtr object_storage; + const Storage::ConfigurationPtr configuration; + const FileIterator file_iterator; + const std::optional & format_settings; + const StorageObjectStorageSettings storage_settings; + ObjectInfos & read_keys; + + size_t prev_read_keys_size; + Storage::ObjectInfoPtr current_object_info; + bool first = true; +}; +} diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorage.h b/src/Storages/ObjectStorage/ReadFromObjectStorage.h new file mode 100644 index 00000000000..9cb77dcc25e --- /dev/null +++ b/src/Storages/ObjectStorage/ReadFromObjectStorage.h @@ -0,0 +1,105 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +template +class ReadFromStorageObejctStorage : public SourceStepWithFilter +{ +public: + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + + ReadFromStorageObejctStorage( + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const String & name_, + const NamesAndTypesList & virtual_columns_, + const std::optional & format_settings_, + bool distributed_processing_, + ReadFromFormatInfo info_, + const bool need_only_count_, + ContextPtr context_, + size_t max_block_size_, + size_t num_streams_) + : SourceStepWithFilter(DataStream{.header = info_.source_header}) + , object_storage(object_storage_) + , configuration(configuration_) + , context(std::move(context_)) + , info(std::move(info_)) + , virtual_columns(virtual_columns_) + , format_settings(format_settings_) + , name(name_ + "Source") + , need_only_count(need_only_count_) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + , distributed_processing(distributed_processing_) + { + } + + std::string getName() const override { return name; } + + void applyFilters() override + { + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + createIterator(predicate); + } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + createIterator(nullptr); + + Pipes pipes; + for (size_t i = 0; i < num_streams; ++i) + { + pipes.emplace_back(std::make_shared( + getName(), object_storage, configuration, info, format_settings, + context, max_block_size, iterator_wrapper, need_only_count)); + } + + auto pipe = Pipe::unitePipes(std::move(pipes)); + if (pipe.empty()) + pipe = Pipe(std::make_shared(info.source_header)); + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); + } + +private: + ObjectStoragePtr object_storage; + Storage::ConfigurationPtr configuration; + ContextPtr context; + + const ReadFromFormatInfo info; + const NamesAndTypesList virtual_columns; + const std::optional format_settings; + const String name; + const bool need_only_count; + const size_t max_block_size; + const size_t num_streams; + const bool distributed_processing; + + std::shared_ptr iterator_wrapper; + + void createIterator(const ActionsDAG::Node * predicate) + { + if (!iterator_wrapper) + { + iterator_wrapper = Source::createFileIterator( + configuration, object_storage, distributed_processing, context, + predicate, virtual_columns, nullptr, context->getFileProgressCallback()); + } + } +}; + +} diff --git a/src/Storages/ObjectStorage/S3Configuration.cpp b/src/Storages/ObjectStorage/S3Configuration.cpp new file mode 100644 index 00000000000..5a5412019f5 --- /dev/null +++ b/src/Storages/ObjectStorage/S3Configuration.cpp @@ -0,0 +1,491 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static const std::unordered_set required_configuration_keys = { + "url", +}; + +static const std::unordered_set optional_configuration_keys = { + "format", + "compression", + "compression_method", + "structure", + "access_key_id", + "secret_access_key", + "session_token", + "filename", + "use_environment_credentials", + "max_single_read_retries", + "min_upload_part_size", + "upload_part_size_multiply_factor", + "upload_part_size_multiply_parts_count_threshold", + "max_single_part_upload_size", + "max_connections", + "expiration_window_seconds", + "no_sign_request" +}; + +String StorageS3Configuration::getDataSourceDescription() +{ + return fs::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; +} + +void StorageS3Configuration::check(ContextPtr context) const +{ + context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri); + context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); +} + +StorageObjectStorageConfigurationPtr StorageS3Configuration::clone() +{ + auto configuration = std::make_shared(); + configuration->url = url; + configuration->auth_settings = auth_settings; + configuration->request_settings = request_settings; + configuration->static_configuration = static_configuration; + configuration->headers_from_ast = headers_from_ast; + configuration->keys = keys; + configuration->initialized = initialized; + return configuration; +} + +ObjectStoragePtr StorageS3Configuration::createOrUpdateObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT +{ + auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString()); + request_settings = s3_settings.request_settings; + request_settings.updateFromSettings(context->getSettings()); + + if (!initialized || (!static_configuration && auth_settings.hasUpdates(s3_settings.auth_settings))) + { + auth_settings.updateFrom(s3_settings.auth_settings); + keys[0] = url.key; + initialized = true; + } + + const auto & config = context->getConfigRef(); + auto s3_capabilities = S3Capabilities + { + .support_batch_delete = config.getBool("s3.support_batch_delete", true), + .support_proxy = config.getBool("s3.support_proxy", config.has("s3.proxy")), + }; + + auto s3_storage_settings = std::make_unique( + request_settings, + config.getUInt64("s3.min_bytes_for_seek", 1024 * 1024), + config.getInt("s3.list_object_keys_size", 1000), + config.getInt("s3.objects_chunk_size_to_delete", 1000), + config.getBool("s3.readonly", false)); + + auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key); + auto client = createClient(context); + std::string disk_name = "StorageS3"; + + return std::make_shared( + std::move(client), std::move(s3_storage_settings), url, s3_capabilities, key_generator, /*disk_name*/disk_name); +} + +std::unique_ptr StorageS3Configuration::createClient(ContextPtr context) +{ + const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); + const Settings & local_settings = context->getSettingsRef(); + + auto client_configuration = S3::ClientFactory::instance().createClientConfiguration( + auth_settings.region, + context->getRemoteHostFilter(), + static_cast(global_settings.s3_max_redirects), + static_cast(global_settings.s3_retry_attempts), + global_settings.enable_s3_requests_logging, + /* for_disk_s3 = */ false, + request_settings.get_request_throttler, + request_settings.put_request_throttler, + url.uri.getScheme()); + + client_configuration.endpointOverride = url.endpoint; + client_configuration.maxConnections = static_cast(request_settings.max_connections); + client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size; + + auto headers = auth_settings.headers; + if (!headers_from_ast.empty()) + headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); + + client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; + + S3::ClientSettings client_settings{ + .use_virtual_addressing = url.is_virtual_hosted_style, + .disable_checksum = local_settings.s3_disable_checksum, + .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), + }; + + auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, + auth_settings.secret_access_key, + auth_settings.session_token); + + auto credentials_configuration = S3::CredentialsConfiguration + { + auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), + auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), + auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), + auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), + }; + + return S3::ClientFactory::instance().create( + client_configuration, + client_settings, + credentials.GetAWSAccessKeyId(), + credentials.GetAWSSecretKey(), + auth_settings.server_side_encryption_customer_key_base64, + auth_settings.server_side_encryption_kms_config, + std::move(headers), + credentials_configuration); +} + +void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection) +{ + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + + auto filename = collection.getOrDefault("filename", ""); + if (!filename.empty()) + url = S3::URI(std::filesystem::path(collection.get("url")) / filename); + else + url = S3::URI(collection.get("url")); + + auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); + auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); + auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); + auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); + auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); + + format = collection.getOrDefault("format", format); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + structure = collection.getOrDefault("structure", "auto"); + + request_settings = S3Settings::RequestSettings(collection); + + static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + + keys = {url.key}; + + //if (format == "auto" && get_format_from_file) + if (format == "auto") + format = FormatFactory::instance().getFormatFromFileName(url.key, true); +} + +void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure) +{ + /// Supported signatures: S3('url') S3('url', 'format') S3('url', 'format', 'compression') S3('url', NOSIGN) S3('url', NOSIGN, 'format') S3('url', NOSIGN, 'format', 'compression') S3('url', 'aws_access_key_id', 'aws_secret_access_key') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') + /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') + /// with optional headers() function + + size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context); + + if (count == 0 || count > (with_structure ? 7 : 6)) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage S3 requires 1 to 5 arguments: " + "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); + + std::unordered_map engine_args_to_idx; + bool no_sign_request = false; + + /// For 2 arguments we support 2 possible variants: + /// - s3(source, format) + /// - s3(source, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + if (count == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + no_sign_request = true; + else + engine_args_to_idx = {{"format", 1}}; + } + /// For 3 arguments we support 2 possible variants: + /// - s3(source, format, compression_method) + /// - s3(source, access_key_id, secret_access_key) + /// - s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. + else if (count == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + engine_args_to_idx = {{"format", 2}}; + } + else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { + if (with_structure) + engine_args_to_idx = {{"format", 1}, {"structure", 2}}; + else + engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; + } + else + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; + } + /// For 4 arguments we support 3 possible variants: + /// if with_structure == 0: + /// - s3(source, access_key_id, secret_access_key, session_token) + /// - s3(source, access_key_id, secret_access_key, format) + /// - s3(source, NOSIGN, format, compression_method) + /// if with_structure == 1: + /// - s3(source, format, structure, compression_method), + /// - s3(source, access_key_id, secret_access_key, format), + /// - s3(source, access_key_id, secret_access_key, session_token) + /// - s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. + else if (count == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "access_key_id/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + if (with_structure) + engine_args_to_idx = {{"format", 2}, {"structure", 3}}; + else + engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; + } + else if (with_structure && (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))) + { + engine_args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; + } + } + } + /// For 5 arguments we support 2 possible variants: + /// if with_structure == 0: + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, compression) + /// if with_structure == 1: + /// - s3(source, access_key_id, secret_access_key, format, structure) + /// - s3(source, access_key_id, secret_access_key, session_token, format) + /// - s3(source, NOSIGN, format, structure, compression_method) + else if (count == 5) + { + if (with_structure) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); + if (boost::iequals(second_arg, "NOSIGN")) + { + no_sign_request = true; + engine_args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + else + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "session_token/format"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; + } + } + } + else if (count == 6) + { + if (with_structure) + { + /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) + /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) + /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); + if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; + } + } + else + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; + } + } + else if (with_structure && count == 7) + { + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; + } + + /// This argument is always the first + url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + + if (engine_args_to_idx.contains("format")) + { + format = checkAndGetLiteralArgument(args[engine_args_to_idx["format"]], "format"); + /// Set format to configuration only of it's not 'auto', + /// because we can have default format set in configuration. + if (format != "auto") + format = format; + } + + if (engine_args_to_idx.contains("structure")) + structure = checkAndGetLiteralArgument(args[engine_args_to_idx["structure"]], "structure"); + + if (engine_args_to_idx.contains("compression_method")) + compression_method = checkAndGetLiteralArgument(args[engine_args_to_idx["compression_method"]], "compression_method"); + + if (engine_args_to_idx.contains("access_key_id")) + auth_settings.access_key_id = checkAndGetLiteralArgument(args[engine_args_to_idx["access_key_id"]], "access_key_id"); + + if (engine_args_to_idx.contains("secret_access_key")) + auth_settings.secret_access_key = checkAndGetLiteralArgument(args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); + + if (engine_args_to_idx.contains("session_token")) + auth_settings.session_token = checkAndGetLiteralArgument(args[engine_args_to_idx["session_token"]], "session_token"); + + if (no_sign_request) + auth_settings.no_sign_request = no_sign_request; + + static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + auth_settings.no_sign_request = no_sign_request; + + keys = {url.key}; + + // if (format == "auto" && get_format_from_file) + if (format == "auto") + format = FormatFactory::instance().getFormatFromFileName(url.key, true); +} + +void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + HTTPHeaderEntries tmp_headers; + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); + + if (count == 0 || count > 6) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to 6 arguments in table function, got {}", count); + + auto structure_literal = std::make_shared(structure_); + + /// s3(s3_url) + if (count == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(s3_url, format) or s3(s3_url, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + else if (count == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// If there is NOSIGN, add format=auto before structure. + if (boost::iequals(second_arg, "NOSIGN")) + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(source, format, structure) or + /// s3(source, access_key_id, secret_access_key) or + /// s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (count == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args.push_back(structure_literal); + } + else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { + args[count - 1] = structure_literal; + } + else + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + } + /// s3(source, format, structure, compression_method) or + /// s3(source, access_key_id, secret_access_key, format) or + /// s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (count == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args[count - 1] = structure_literal; + } + else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { + args[count - 2] = structure_literal; + } + else + { + args.push_back(structure_literal); + } + } + /// s3(source, access_key_id, secret_access_key, format, structure) or + /// s3(source, NOSIGN, format, structure, compression_method) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. + else if (count == 5) + { + auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(sedond_arg, "NOSIGN")) + { + args[count - 2] = structure_literal; + } + else + { + args[count - 1] = structure_literal; + } + } + /// s3(source, access_key_id, secret_access_key, format, structure, compression) + else if (count == 6) + { + args[count - 2] = structure_literal; + } + } +} + +} diff --git a/src/Storages/ObjectStorage/S3Configuration.h b/src/Storages/ObjectStorage/S3Configuration.h new file mode 100644 index 00000000000..34f5735e02a --- /dev/null +++ b/src/Storages/ObjectStorage/S3Configuration.h @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class StorageS3Configuration : public StorageObjectStorageConfiguration +{ +public: + Path getPath() const override { return url.key; } + void setPath(const Path & path) override { url.key = path; } + + const Paths & getPaths() const override { return keys; } + Paths & getPaths() override { return keys; } + + String getNamespace() const override { return url.bucket; } + String getDataSourceDescription() override; + + void check(ContextPtr context) const override; + StorageObjectStorageConfigurationPtr clone() override; + + ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context); + +private: + S3::URI url; + S3::AuthSettings auth_settings; + S3Settings::RequestSettings request_settings; + /// If s3 configuration was passed from ast, then it is static. + /// If from config - it can be changed with config reload. + bool static_configuration = true; + /// Headers from ast is a part of static configuration. + HTTPHeaderEntries headers_from_ast; + std::vector keys; + + std::unique_ptr createClient(ContextPtr context); + + bool initialized = false; +}; + +} diff --git a/src/Storages/ObjectStorage/Settings.h b/src/Storages/ObjectStorage/Settings.h new file mode 100644 index 00000000000..015cf9bc01d --- /dev/null +++ b/src/Storages/ObjectStorage/Settings.h @@ -0,0 +1,86 @@ +#pragma once +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric ObjectStorageAzureThreads; + extern const Metric ObjectStorageAzureThreadsActive; + extern const Metric ObjectStorageAzureThreadsScheduled; + + extern const Metric ObjectStorageS3Threads; + extern const Metric ObjectStorageS3ThreadsActive; + extern const Metric ObjectStorageS3ThreadsScheduled; +} + +namespace DB +{ + +struct StorageObjectStorageSettings +{ + bool truncate_on_insert; + bool create_new_file_on_insert; + bool schema_inference_use_cache; + SchemaInferenceMode schema_inference_mode; +}; + +struct S3StorageSettings +{ + static StorageObjectStorageSettings create(const Settings & settings) + { + return StorageObjectStorageSettings{ + .truncate_on_insert = settings.s3_truncate_on_insert, + .create_new_file_on_insert = settings.s3_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3, + .schema_inference_mode = settings.schema_inference_mode, + }; + } + + static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_s3"; + + static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT + static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT + static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT +}; + +struct AzureStorageSettings +{ + static StorageObjectStorageSettings create(const Settings & settings) + { + return StorageObjectStorageSettings{ + .truncate_on_insert = settings.azure_truncate_on_insert, + .create_new_file_on_insert = settings.azure_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, + .schema_inference_mode = settings.schema_inference_mode, + }; + } + + static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_azure"; + + static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageAzureThreads; } /// NOLINT + static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageAzureThreadsActive; } /// NOLINT + static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageAzureThreadsScheduled; } /// NOLINT +}; + +struct HDFSStorageSettings +{ + static StorageObjectStorageSettings create(const Settings & settings) + { + return StorageObjectStorageSettings{ + .truncate_on_insert = settings.hdfs_truncate_on_insert, + .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, + .schema_inference_mode = settings.schema_inference_mode, + }; + } + + static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_hdfs"; + + /// TODO: s3 -> hdfs + static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT + static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT + static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp new file mode 100644 index 00000000000..9250ab8ecbe --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -0,0 +1,303 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int DATABASE_ACCESS_DENIED; + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; + +} + +template +std::unique_ptr getStorageMetadata( + ObjectStoragePtr object_storage, + const StorageObjectStorageConfigurationPtr & configuration, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints, + std::optional format_settings, + const String & comment, + const std::string & engine_name, + const ContextPtr & context) +{ + auto storage_metadata = std::make_unique(); + if (columns.empty()) + { + auto fetched_columns = StorageObjectStorage::getTableStructureFromData( + object_storage, configuration, format_settings, context); + storage_metadata->setColumns(fetched_columns); + } + else + { + /// We don't allow special columns. + if (!columns.hasOnlyOrdinary()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table engine {} doesn't support special columns " + "like MATERIALIZED, ALIAS or EPHEMERAL", + engine_name); + + storage_metadata->setColumns(columns); + } + + storage_metadata->setConstraints(constraints); + storage_metadata->setComment(comment); + return storage_metadata; +} + +template +StorageObjectStorage::StorageObjectStorage( + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const String & engine_name_, + ContextPtr context, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_, + bool distributed_processing_, + ASTPtr partition_by_) + : IStorage(table_id_, getStorageMetadata( + object_storage_, configuration_, columns_, constraints_, format_settings_, + comment, engine_name, context)) + , engine_name(engine_name_) + , virtual_columns(VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage( + getInMemoryMetadataPtr()->getSampleBlock().getNamesAndTypesList())) + , format_settings(format_settings_) + , partition_by(partition_by_) + , distributed_processing(distributed_processing_) + , object_storage(object_storage_) + , configuration(configuration_) +{ + FormatFactory::instance().checkFormatName(configuration->format); + configuration->check(context); + + StoredObjects objects; + for (const auto & key : configuration->getPaths()) + objects.emplace_back(key); +} + +template +Names StorageObjectStorage::getVirtualColumnNames() +{ + return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); +} + +template +bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings); +} + +template +bool StorageObjectStorage::prefersLargeBlocks() const +{ + return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format); +} + +template +bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const +{ + return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context); +} + +template +std::pair +StorageObjectStorage::updateConfigurationAndGetCopy(ContextPtr local_context) +{ + std::lock_guard lock(configuration_update_mutex); + auto new_object_storage = configuration->createOrUpdateObjectStorage(local_context); + if (new_object_storage) + object_storage = new_object_storage; + return {configuration, object_storage}; +} + +template +SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context) +{ + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + StorageSettings::SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING, + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; +} + +template +void StorageObjectStorage::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + if (partition_by && configuration->withWildcard()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from a partitioned {} storage is not implemented yet", + getName()); + } + + auto this_ptr = std::static_pointer_cast(shared_from_this()); + auto read_from_format_info = prepareReadingFromFormat( + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); + bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + && local_context->getSettingsRef().optimize_count_from_files; + + auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context); + auto reading = std::make_unique>( + query_object_storage, + query_configuration, + getName(), + virtual_columns, + format_settings, + distributed_processing, + std::move(read_from_format_info), + need_only_count, + local_context, + max_block_size, + num_streams); + + query_plan.addStep(std::move(reading)); +} + +template +SinkToStoragePtr StorageObjectStorage::write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + bool /* async_insert */) +{ + auto insert_query = std::dynamic_pointer_cast(query); + auto partition_by_ast = insert_query + ? (insert_query->partition_by ? insert_query->partition_by : partition_by) + : nullptr; + bool is_partitioned_implementation = partition_by_ast && configuration->withWildcard(); + + auto sample_block = metadata_snapshot->getSampleBlock(); + auto storage_settings = StorageSettings::create(local_context->getSettingsRef()); + + if (is_partitioned_implementation) + { + return std::make_shared( + object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); + } + + if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs()) + { + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "{} key '{}' contains globs, so the table is in readonly mode", + getName(), configuration->getPath()); + } + + if (!storage_settings.truncate_on_insert + && object_storage->exists(StoredObject(configuration->getPath()))) + { + if (storage_settings.create_new_file_on_insert) + { + size_t index = configuration->getPaths().size(); + const auto & first_key = configuration->getPaths()[0]; + auto pos = first_key.find_first_of('.'); + String new_key; + + do + { + new_key = first_key.substr(0, pos) + + "." + + std::to_string(index) + + (pos == std::string::npos ? "" : first_key.substr(pos)); + ++index; + } + while (object_storage->exists(StoredObject(new_key))); + + configuration->getPaths().push_back(new_key); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert", + configuration->getNamespace(), configuration->getPaths().back()); + } + } + + return std::make_shared( + object_storage, configuration, format_settings, sample_block, local_context); +} + +template +void StorageObjectStorage::truncate( + const ASTPtr &, + const StorageMetadataPtr &, + ContextPtr, + TableExclusiveLockHolder &) +{ + if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs()) + { + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "{} key '{}' contains globs, so the table is in readonly mode and cannot be truncated", + getName(), configuration->getPath()); + } + + StoredObjects objects; + for (const auto & key : configuration->getPaths()) + objects.emplace_back(key); + + object_storage->removeObjectsIfExist(objects); +} + +template +ColumnsDescription StorageObjectStorage::getTableStructureFromData( + ObjectStoragePtr object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ContextPtr context) +{ + using Source = StorageObjectStorageSource; + + ObjectInfos read_keys; + auto file_iterator = Source::createFileIterator( + configuration, object_storage, /* distributed_processing */false, + context, /* predicate */{}, /* virtual_columns */{}, &read_keys); + + ReadBufferIterator read_buffer_iterator( + object_storage, configuration, file_iterator, + format_settings, read_keys, context); + + const bool retry = configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs(); + return readSchemaFromFormat( + configuration->format, format_settings, + read_buffer_iterator, retry, context); +} + +template class StorageObjectStorage; +template class StorageObjectStorage; +template class StorageObjectStorage; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h new file mode 100644 index 00000000000..0b29845ba5c --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -0,0 +1,116 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +struct SelectQueryInfo; +class StorageObjectStorageConfiguration; +struct S3StorageSettings; +struct HDFSStorageSettings; +struct AzureStorageSettings; +class PullingPipelineExecutor; +using ReadTaskCallback = std::function; +class IOutputFormat; +class IInputFormat; +class SchemaCache; + + +template +class StorageObjectStorage : public IStorage +{ +public: + using Configuration = StorageObjectStorageConfiguration; + using ConfigurationPtr = std::shared_ptr; + using ObjectInfo = RelativePathWithMetadata; + using ObjectInfoPtr = std::shared_ptr; + using ObjectInfos = std::vector; + + StorageObjectStorage( + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const String & engine_name_, + ContextPtr context_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_, + bool distributed_processing_ = false, + ASTPtr partition_by_ = nullptr); + + String getName() const override { return engine_name; } + + void read( + QueryPlan & query_plan, + const Names &, + const StorageSnapshotPtr &, + SelectQueryInfo &, + ContextPtr, + QueryProcessingStage::Enum, + size_t, + size_t) override; + + SinkToStoragePtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + bool async_insert) override; + + void truncate( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + TableExclusiveLockHolder &) override; + + NamesAndTypesList getVirtuals() const override { return virtual_columns; } + + static Names getVirtualColumnNames(); + + bool supportsPartitionBy() const override { return true; } + + bool supportsSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization() const override { return true; } + + bool supportsSubsetOfColumns(const ContextPtr & context) const; + + bool prefersLargeBlocks() const override; + + bool parallelizeOutputAfterReading(ContextPtr context) const override; + + static SchemaCache & getSchemaCache(const ContextPtr & context); + + static ColumnsDescription getTableStructureFromData( + ObjectStoragePtr object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ContextPtr context); + +protected: + virtual std::pair + updateConfigurationAndGetCopy(ContextPtr local_context); + + const std::string engine_name; + const NamesAndTypesList virtual_columns; + std::optional format_settings; + const ASTPtr partition_by; + const bool distributed_processing; + + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + std::mutex configuration_update_mutex; +}; + +using StorageS3 = StorageObjectStorage; +using StorageAzureBlobStorage = StorageObjectStorage; +using StorageHDFS = StorageObjectStorage; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp new file mode 100644 index 00000000000..414932016f4 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -0,0 +1,107 @@ +#include "Storages/ObjectStorage/StorageObjectStorageCluster.h" + +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +template +StorageObjectStorageCluster::StorageObjectStorageCluster( + const String & cluster_name_, + const Storage::ConfigurationPtr & configuration_, + ObjectStoragePtr object_storage_, + const String & engine_name_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_) + : IStorageCluster(cluster_name_, + table_id_, + getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)), + structure_argument_was_provided_) + , engine_name(engine_name_) + , configuration{configuration_} + , object_storage(object_storage_) +{ + configuration->check(context_); + StorageInMemoryMetadata storage_metadata; + + if (columns_.empty()) + { + /// `format_settings` is set to std::nullopt, because StorageObjectStorageCluster is used only as table function + auto columns = StorageObjectStorage::getTableStructureFromData( + object_storage, configuration, /*format_settings=*/std::nullopt, context_); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); + + virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage( + storage_metadata.getSampleBlock().getNamesAndTypesList()); +} + +template +void StorageObjectStorageCluster::addColumnsStructureToQuery( + ASTPtr & query, + const String & structure, + const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected SELECT query from table function {}, got '{}'", + engine_name, queryToString(query)); + } + using TableFunction = TableFunctionObjectStorageCluster; + TableFunction::addColumnsStructureToArguments(expression_list->children, structure, context); +} + +template +RemoteQueryExecutor::Extension +StorageObjectStorageCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr &) const +{ + auto iterator = std::make_shared( + object_storage, configuration, predicate, virtual_columns, nullptr); + + auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next(0)->relative_path; }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + + +#if USE_AWS_S3 +template class StorageObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +template class StorageObjectStorageCluster; +#endif + +#if USE_HDFS +template class StorageObjectStorageCluster; +#endif + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h new file mode 100644 index 00000000000..b1f9af14e03 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -0,0 +1,72 @@ +#pragma once + +#include "config.h" + +#include +#include +#include +#include +#include + +namespace DB +{ + +class StorageS3Settings; +class StorageAzureBlobSettings; + +class Context; + +template +class StorageObjectStorageCluster : public IStorageCluster +{ +public: + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + + StorageObjectStorageCluster( + const String & cluster_name_, + const Storage::ConfigurationPtr & configuration_, + ObjectStoragePtr object_storage_, + const String & engine_name_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_); + + std::string getName() const override { return engine_name; } + + NamesAndTypesList getVirtuals() const override { return virtual_columns; } + + RemoteQueryExecutor::Extension + getTaskIteratorExtension( + const ActionsDAG::Node * predicate, + const ContextPtr & context) const override; + + bool supportsSubcolumns() const override { return true; } + + bool supportsTrivialCountOptimization() const override { return true; } + +private: + void updateBeforeRead(const ContextPtr & /* context */) override {} + + void addColumnsStructureToQuery( + ASTPtr & query, + const String & structure, + const ContextPtr & context) override; + + const String & engine_name; + const Storage::ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + NamesAndTypesList virtual_columns; +}; + + +#if USE_AWS_S3 +using StorageS3Cluster = StorageObjectStorageCluster; +#endif +#if USE_AZURE_BLOB_STORAGE +using StorageAzureBlobCluster = StorageObjectStorageCluster; +#endif + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h new file mode 100644 index 00000000000..34ab8ebec66 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -0,0 +1,155 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace DB +{ +class StorageObjectStorageSink : public SinkToStorage +{ +public: + StorageObjectStorageSink( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context, + const std::string & blob_path = "") + : SinkToStorage(sample_block_) + , sample_block(sample_block_) + , format_settings(format_settings_) + { + const auto & settings = context->getSettingsRef(); + const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path; + const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method); + + auto buffer = object_storage->writeObject( + StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings()); + + write_buf = wrapWriteBufferWithCompressionMethod( + std::move(buffer), + chosen_compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); + + writer = FormatFactory::instance().getOutputFormatParallelIfPossible( + configuration->format, *write_buf, sample_block, context, format_settings); + } + + String getName() const override { return "StorageObjectStorageSink"; } + + void consume(Chunk chunk) override + { + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + } + + void onCancel() override + { + std::lock_guard lock(cancel_mutex); + finalize(); + cancelled = true; + } + + void onException(std::exception_ptr exception) override + { + std::lock_guard lock(cancel_mutex); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization. + release(); + } + } + + void onFinish() override + { + std::lock_guard lock(cancel_mutex); + finalize(); + } + +private: + const Block sample_block; + const std::optional format_settings; + + std::unique_ptr write_buf; + OutputFormatPtr writer; + bool cancelled = false; + std::mutex cancel_mutex; + + void finalize() + { + if (!writer) + return; + + try + { + writer->finalize(); + writer->flush(); + write_buf->finalize(); + } + catch (...) + { + /// Stop ParallelFormattingOutputFormat correctly. + release(); + throw; + } + } + + void release() + { + writer.reset(); + write_buf->finalize(); + } +}; + +class PartitionedStorageObjectStorageSink : public PartitionedSink +{ +public: + PartitionedStorageObjectStorageSink( + ObjectStoragePtr object_storage_, + StorageObjectStorageConfigurationPtr configuration_, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ASTPtr & partition_by) + : PartitionedSink(partition_by, context_, sample_block_) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , sample_block(sample_block_) + , context(context_) + { + } + + SinkPtr createSinkForPartition(const String & partition_id) override + { + auto blob = configuration->getPaths().back(); + auto partition_key = replaceWildcards(blob, partition_id); + validatePartitionKey(partition_key, true); + return std::make_shared( + object_storage, + configuration, + format_settings, + sample_block, + context, + partition_key + ); + } + +private: + ObjectStoragePtr object_storage; + StorageObjectStorageConfigurationPtr configuration; + const std::optional format_settings; + const Block sample_block; + const ContextPtr context; +}; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp new file mode 100644 index 00000000000..9fc7925a6d1 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -0,0 +1,464 @@ +#include "StorageObjectStorageSource.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event EngineFileLikeReadFiles; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_COMPILE_REGEXP; +} + +template +std::shared_ptr::IIterator> +StorageObjectStorageSource::createFileIterator( + Storage::ConfigurationPtr configuration, + ObjectStoragePtr object_storage, + bool distributed_processing, + const ContextPtr & local_context, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns, + ObjectInfos * read_keys, + std::function file_progress_callback) +{ + if (distributed_processing) + return std::make_shared(local_context->getReadTaskCallback()); + + if (configuration->isNamespaceWithGlobs()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); + + if (configuration->isPathWithGlobs()) + { + /// Iterate through disclosed globs and make a source for each file + return std::make_shared( + object_storage, configuration, predicate, virtual_columns, read_keys, file_progress_callback); + } + else + { + return std::make_shared( + object_storage, configuration, virtual_columns, read_keys, file_progress_callback); + } +} + +template +StorageObjectStorageSource::GlobIterator::GlobIterator( + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + std::function file_progress_callback_) + : object_storage(object_storage_) + , configuration(configuration_) + , virtual_columns(virtual_columns_) + , read_keys(read_keys_) + , file_progress_callback(file_progress_callback_) +{ + if (configuration->isNamespaceWithGlobs()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); + } + else if (configuration->isPathWithGlobs()) + { + const auto key_with_globs = configuration_->getPath(); + const auto key_prefix = configuration->getPathWithoutGlob(); + object_storage_iterator = object_storage->iterate(key_prefix); + + matcher = std::make_unique(makeRegexpPatternFromGlobs(key_with_globs)); + if (matcher->ok()) + { + recursive = key_with_globs == "/**"; + filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + } + else + { + throw Exception( + ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", key_with_globs, matcher->error()); + } + } + else + { + const auto key_with_globs = configuration_->getPath(); + auto object_metadata = object_storage->getObjectMetadata(key_with_globs); + auto object_info = std::make_shared(key_with_globs, object_metadata); + + object_infos.emplace_back(object_info); + if (read_keys) + read_keys->emplace_back(object_info); + + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); + + is_finished = true; + } +} + +template +StorageObjectStorageSource::ObjectInfoPtr +StorageObjectStorageSource::GlobIterator::next(size_t /* processor */) +{ + std::lock_guard lock(next_mutex); + + if (is_finished && index >= object_infos.size()) + return {}; + + bool need_new_batch = object_infos.empty() || index >= object_infos.size(); + + if (need_new_batch) + { + ObjectInfos new_batch; + while (new_batch.empty()) + { + auto result = object_storage_iterator->getCurrentBatchAndScheduleNext(); + if (result.has_value()) + { + new_batch = result.value(); + } + else + { + is_finished = true; + return {}; + } + + for (auto it = new_batch.begin(); it != new_batch.end();) + { + if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher)) + it = new_batch.erase(it); + else + ++it; + } + } + + index = 0; + + if (filter_dag) + { + std::vector paths; + paths.reserve(new_batch.size()); + for (auto & object_info : new_batch) + paths.push_back(fs::path(configuration->getNamespace()) / object_info->relative_path); + + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + } + + if (read_keys) + read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); + + object_infos = std::move(new_batch); + if (file_progress_callback) + { + for (const auto & object_info : object_infos) + { + file_progress_callback(FileProgress(0, object_info->metadata.size_bytes)); + } + } + } + + size_t current_index = index++; + if (current_index >= object_infos.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); + + return object_infos[current_index]; +} + +template +StorageObjectStorageSource::KeysIterator::KeysIterator( + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + std::function file_progress_callback_) + : object_storage(object_storage_) + , configuration(configuration_) + , virtual_columns(virtual_columns_) + , file_progress_callback(file_progress_callback_) + , keys(configuration->getPaths()) +{ + if (read_keys_) + { + /// TODO: should we add metadata if we anyway fetch it if file_progress_callback is passed? + for (auto && key : keys) + { + auto object_info = std::make_shared(key, ObjectMetadata{}); + read_keys_->emplace_back(object_info); + } + } +} + +template +StorageObjectStorageSource::ObjectInfoPtr +StorageObjectStorageSource::KeysIterator::next(size_t /* processor */) +{ + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= keys.size()) + return {}; + + auto key = keys[current_index]; + + ObjectMetadata metadata{}; + if (file_progress_callback) + { + metadata = object_storage->getObjectMetadata(key); + file_progress_callback(FileProgress(0, metadata.size_bytes)); + } + + return std::make_shared(key, metadata); +} + +template +Chunk StorageObjectStorageSource::generate() +{ + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + total_rows_in_file += num_rows; + size_t chunk_size = 0; + if (const auto * input_format = reader.getInputFormat()) + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, + read_from_format_info.requested_virtual_columns, + fs::path(configuration->getNamespace()) / reader.getRelativePath(), + reader.getObjectInfo().metadata.size_bytes); + + return chunk; + } + + if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); + + total_rows_in_file = 0; + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool.wait(); + reader_future = createReaderAsync(); + } + + return {}; +} + +template +void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t num_rows) +{ + String source = fs::path(configuration->getDataSourceDescription()) / path; + auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); + Storage::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); +} + +template +std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info) +{ + String source = fs::path(configuration->getDataSourceDescription()) / object_info->relative_path; + auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); + auto get_last_mod_time = [&]() -> std::optional + { + auto last_mod = object_info->metadata.last_modified; + if (last_mod) + return last_mod->epochTime(); + else + { + object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); + return object_info->metadata.last_modified->epochMicroseconds(); + } + }; + return Storage::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); +} + +template +StorageObjectStorageSource::StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const ReadFromFormatInfo & info, + std::optional format_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + bool need_only_count_) + :ISource(info.source_header, false) + , WithContext(context_) + , name(std::move(name_)) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , max_block_size(max_block_size_) + , need_only_count(need_only_count_) + , read_from_format_info(info) + , columns_desc(info.columns_description) + , file_iterator(file_iterator_) + , create_reader_pool(StorageSettings::ObjectStorageThreads(), + StorageSettings::ObjectStorageThreadsActive(), + StorageSettings::ObjectStorageThreadsScheduled(), 1) + , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "Reader")) +{ + reader = createReader(); + if (reader) + reader_future = createReaderAsync(); +} + +template +StorageObjectStorageSource::~StorageObjectStorageSource() +{ + create_reader_pool.wait(); +} + +template +StorageObjectStorageSource::ReaderHolder +StorageObjectStorageSource::createReader(size_t processor) +{ + auto object_info = file_iterator->next(processor); + if (object_info->relative_path.empty()) + return {}; + + if (object_info->metadata.size_bytes == 0) + object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); + + QueryPipelineBuilder builder; + std::shared_ptr source; + std::unique_ptr read_buf; + std::optional num_rows_from_cache = need_only_count + && getContext()->getSettingsRef().use_cache_for_count_from_files + ? tryGetNumRowsFromCache(object_info) + : std::nullopt; + + if (num_rows_from_cache) + { + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + source = std::make_shared( + read_from_format_info.format_header, *num_rows_from_cache, max_block_size); + builder.init(Pipe(source)); + } + else + { + std::optional max_parsing_threads; + if (need_only_count) + max_parsing_threads = 1; + + auto compression_method = chooseCompressionMethod( + object_info->relative_path, configuration->compression_method); + + read_buf = createReadBuffer(object_info->relative_path, object_info->metadata.size_bytes); + + auto input_format = FormatFactory::instance().getInput( + configuration->format, *read_buf, read_from_format_info.format_header, + getContext(), max_block_size, format_settings, max_parsing_threads, + std::nullopt, /* is_remote_fs */ true, compression_method); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_desc.hasDefaults()) + { + builder.addSimpleTransform( + [&](const Block & header) + { + return std::make_shared(header, columns_desc, *input_format, getContext()); + }); + } + + source = input_format; + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, read_from_format_info.requested_columns); + }); + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto current_reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); + + return ReaderHolder{object_info, std::move(read_buf), + std::move(source), std::move(pipeline), std::move(current_reader)}; +} + +template +std::future::ReaderHolder> +StorageObjectStorageSource::createReaderAsync(size_t processor) +{ + return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); +} + +template +std::unique_ptr StorageObjectStorageSource::createReadBuffer(const String & key, size_t object_size) +{ + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + read_settings.enable_filesystem_cache = false; + read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; + + // auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; + // const bool object_too_small = object_size <= 2 * download_buffer_size; + + // Create a read buffer that will prefetch the first ~1 MB of the file. + // When reading lots of tiny files, this prefetching almost doubles the throughput. + // For bigger files, parallel reading is more useful. + // if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + // { + // LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); + + // auto async_reader = object_storage->readObjects( + // StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings); + + // async_reader->setReadUntilEnd(); + // if (read_settings.remote_fs_prefetch) + // async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); + + // return async_reader; + // } + // else + return object_storage->readObject(StoredObject(key), read_settings); +} + +template class StorageObjectStorageSource; +template class StorageObjectStorageSource; +template class StorageObjectStorageSource; + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h new file mode 100644 index 00000000000..f68a5d47456 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -0,0 +1,217 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ +template +class StorageObjectStorageSource : public ISource, WithContext +{ + friend class StorageS3QueueSource; +public: + using Source = StorageObjectStorageSource; + using Storage = StorageObjectStorage; + using ObjectInfo = Storage::ObjectInfo; + using ObjectInfoPtr = Storage::ObjectInfoPtr; + using ObjectInfos = Storage::ObjectInfos; + + class IIterator : public WithContext + { + public: + virtual ~IIterator() = default; + + virtual size_t estimatedKeysCount() = 0; + virtual ObjectInfoPtr next(size_t processor) = 0; + }; + + class ReadTaskIterator; + class GlobIterator; + class KeysIterator; + + StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration, + const ReadFromFormatInfo & info, + std::optional format_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + bool need_only_count_); + + ~StorageObjectStorageSource() override; + + String getName() const override { return name; } + + Chunk generate() override; + + static std::shared_ptr createFileIterator( + Storage::ConfigurationPtr configuration, + ObjectStoragePtr object_storage, + bool distributed_processing, + const ContextPtr & local_context, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns, + ObjectInfos * read_keys, + std::function file_progress_callback = {}); + +protected: + void addNumRowsToCache(const String & path, size_t num_rows); + std::optional tryGetNumRowsFromCache(const ObjectInfoPtr & object_info); + + const String name; + ObjectStoragePtr object_storage; + const Storage::ConfigurationPtr configuration; + const std::optional format_settings; + const UInt64 max_block_size; + const bool need_only_count; + const ReadFromFormatInfo read_from_format_info; + + ColumnsDescription columns_desc; + std::shared_ptr file_iterator; + size_t total_rows_in_file = 0; + + struct ReaderHolder + { + public: + ReaderHolder( + ObjectInfoPtr object_info_, + std::unique_ptr read_buf_, + std::shared_ptr source_, + std::unique_ptr pipeline_, + std::unique_ptr reader_) + : object_info(std::move(object_info_)) + , read_buf(std::move(read_buf_)) + , source(std::move(source_)) + , pipeline(std::move(pipeline_)) + , reader(std::move(reader_)) + { + } + + ReaderHolder() = default; + ReaderHolder(const ReaderHolder & other) = delete; + ReaderHolder & operator=(const ReaderHolder & other) = delete; + ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } + + ReaderHolder & operator=(ReaderHolder && other) noexcept + { + /// The order of destruction is important. + /// reader uses pipeline, pipeline uses read_buf. + reader = std::move(other.reader); + pipeline = std::move(other.pipeline); + source = std::move(other.source); + read_buf = std::move(other.read_buf); + object_info = std::move(other.object_info); + return *this; + } + + explicit operator bool() const { return reader != nullptr; } + PullingPipelineExecutor * operator->() { return reader.get(); } + const PullingPipelineExecutor * operator->() const { return reader.get(); } + const String & getRelativePath() const { return object_info->relative_path; } + const ObjectInfo & getObjectInfo() const { return *object_info; } + const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } + + private: + ObjectInfoPtr object_info; + std::unique_ptr read_buf; + std::shared_ptr source; + std::unique_ptr pipeline; + std::unique_ptr reader; + }; + + ReaderHolder reader; + LoggerPtr log = getLogger("StorageObjectStorageSource"); + ThreadPool create_reader_pool; + ThreadPoolCallbackRunner create_reader_scheduler; + std::future reader_future; + + /// Recreate ReadBuffer and Pipeline for each file. + ReaderHolder createReader(size_t processor = 0); + std::future createReaderAsync(size_t processor = 0); + + std::unique_ptr createReadBuffer(const String & key, size_t object_size); +}; + +template +class StorageObjectStorageSource::ReadTaskIterator : public IIterator +{ +public: + explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {} + + size_t estimatedKeysCount() override { return 0; } /// TODO FIXME + + ObjectInfoPtr next(size_t) override { return std::make_shared( callback(), ObjectMetadata{} ); } + +private: + ReadTaskCallback callback; +}; + +template +class StorageObjectStorageSource::GlobIterator : public IIterator +{ +public: + GlobIterator( + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const ActionsDAG::Node * predicate, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + std::function file_progress_callback_ = {}); + + ~GlobIterator() override = default; + + size_t estimatedKeysCount() override { return object_infos.size(); } + + ObjectInfoPtr next(size_t processor) override; + +private: + ObjectStoragePtr object_storage; + Storage::ConfigurationPtr configuration; + ActionsDAGPtr filter_dag; + NamesAndTypesList virtual_columns; + + size_t index = 0; + + ObjectInfos object_infos; + ObjectInfos * read_keys; + ObjectStorageIteratorPtr object_storage_iterator; + bool recursive{false}; + + std::unique_ptr matcher; + + void createFilterAST(const String & any_key); + bool is_finished = false; + std::mutex next_mutex; + + std::function file_progress_callback; +}; + +template +class StorageObjectStorageSource::KeysIterator : public IIterator +{ +public: + KeysIterator( + ObjectStoragePtr object_storage_, + Storage::ConfigurationPtr configuration_, + const NamesAndTypesList & virtual_columns_, + ObjectInfos * read_keys_, + std::function file_progress_callback = {}); + + ~KeysIterator() override = default; + + size_t estimatedKeysCount() override { return keys.size(); } + + ObjectInfoPtr next(size_t processor) override; + +private: + const ObjectStoragePtr object_storage; + const Storage::ConfigurationPtr configuration; + const NamesAndTypesList virtual_columns; + const std::function file_progress_callback; + const std::vector keys; + std::atomic index = 0; +}; +} diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp new file mode 100644 index 00000000000..bc9f93690f5 --- /dev/null +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +static void initializeConfiguration( + StorageObjectStorageConfiguration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); +} + +template +static std::shared_ptr> createStorageObjectStorage( + const StorageFactory::Arguments & args, + typename StorageObjectStorage::ConfigurationPtr configuration, + const String & engine_name, + ContextPtr context) +{ + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + if (args.storage_def->settings) + { + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = context->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + } + + // Apply changes from SETTINGS clause, with validation. + user_format_settings.applyChanges(args.storage_def->settings->changes); + format_settings = getFormatSettings(context, user_format_settings); + } + else + { + format_settings = getFormatSettings(context); + } + + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return std::make_shared>( + configuration, + configuration->createOrUpdateObjectStorage(context), + engine_name, + args.getContext(), + args.table_id, + args.columns, + args.constraints, + args.comment, + format_settings, + /* distributed_processing */ false, + partition_by); +} + +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzure(StorageFactory & factory) +{ + factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) + { + auto context = args.getLocalContext(); + auto configuration = std::make_shared(); + initializeConfiguration(*configuration, args.engine_args, context, false); + return createStorageObjectStorage(args, configuration, "Azure", context); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::AZURE, + }); +} +#endif + +#if USE_AWS_S3 +void registerStorageS3Impl(const String & name, StorageFactory & factory) +{ + factory.registerStorage(name, [=](const StorageFactory::Arguments & args) + { + auto context = args.getLocalContext(); + auto configuration = std::make_shared(); + initializeConfiguration(*configuration, args.engine_args, context, false); + return createStorageObjectStorage(args, configuration, name, context); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +void registerStorageS3(StorageFactory & factory) +{ + return registerStorageS3Impl("S3", factory); +} + +void registerStorageCOS(StorageFactory & factory) +{ + return registerStorageS3Impl("COSN", factory); +} + +void registerStorageOSS(StorageFactory & factory) +{ + return registerStorageS3Impl("OSS", factory); +} + +#endif + +#if USE_HDFS +void registerStorageHDFS(StorageFactory & factory) +{ + factory.registerStorage("HDFS", [=](const StorageFactory::Arguments & args) + { + auto context = args.getLocalContext(); + auto configuration = std::make_shared(); + initializeConfiguration(*configuration, args.engine_args, context, false); + return createStorageObjectStorage(args, configuration, "HDFS", context); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::HDFS, + }); +} +#endif + +void registerStorageObjectStorage(StorageFactory & factory) +{ +#if USE_AWS_S3 + registerStorageS3(factory); + registerStorageCOS(factory); + registerStorageOSS(factory); +#endif +#if USE_AZURE_BLOB_STORAGE + registerStorageAzure(factory); +#endif +#if USE_HDFS + registerStorageHDFS(factory); +#endif +} + +} diff --git a/src/Storages/ObjectStorageConfiguration.h b/src/Storages/ObjectStorageConfiguration.h new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index b4f5f957f76..bd34d1ec093 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -5,9 +5,9 @@ #include #include #include -#include #include #include +#include namespace CurrentMetrics @@ -31,11 +31,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo( +StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( const std::string & key_, - std::optional info_, + const ObjectMetadata & object_metadata_, Metadata::ProcessingNodeHolderPtr processing_holder_) - : StorageS3Source::KeyWithInfo(key_, info_) + : Source::ObjectInfo(key_, object_metadata_) , processing_holder(processing_holder_) { } @@ -55,15 +55,15 @@ StorageS3QueueSource::FileIterator::FileIterator( if (sharded_processing) { for (const auto & id : metadata->getProcessingIdsForShard(current_shard)) - sharded_keys.emplace(id, std::deque{}); + sharded_keys.emplace(id, std::deque{}); } } -StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(size_t idx) +StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor) { while (!shutdown_called) { - KeyWithInfoPtr val{nullptr}; + Source::ObjectInfoPtr val{nullptr}; { std::unique_lock lk(sharded_keys_mutex, std::defer_lock); @@ -73,7 +73,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si /// we need to check sharded_keys and to next() under lock. lk.lock(); - if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) + if (auto it = sharded_keys.find(processor); it != sharded_keys.end()) { auto & keys = it->second; if (!keys.empty()) @@ -86,24 +86,24 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si { throw Exception(ErrorCodes::LOGICAL_ERROR, "Processing id {} does not exist (Expected ids: {})", - idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); + processor, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); } } if (!val) { - val = glob_iterator->next(); + val = glob_iterator->next(processor); if (val && sharded_processing) { - const auto processing_id_for_key = metadata->getProcessingIdForPath(val->key); - if (idx != processing_id_for_key) + const auto processing_id_for_key = metadata->getProcessingIdForPath(val->relative_path); + if (processor != processing_id_for_key) { if (metadata->isProcessingIdBelongsToShard(processing_id_for_key, current_shard)) { LOG_TEST(log, "Putting key {} into queue of processor {} (total: {})", - val->key, processing_id_for_key, sharded_keys.size()); + val->relative_path, processing_id_for_key, sharded_keys.size()); - if (auto it = sharded_keys.find(idx); it != sharded_keys.end()) + if (auto it = sharded_keys.find(processor); it != sharded_keys.end()) { it->second.push_back(val); } @@ -111,7 +111,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si { throw Exception(ErrorCodes::LOGICAL_ERROR, "Processing id {} does not exist (Expected ids: {})", - idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); + processor, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", ")); } } continue; @@ -129,25 +129,25 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si return {}; } - auto processing_holder = metadata->trySetFileAsProcessing(val->key); + auto processing_holder = metadata->trySetFileAsProcessing(val->relative_path); if (shutdown_called) { LOG_TEST(log, "Shutdown was called, stopping file iterator"); return {}; } - LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->key, idx); + LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->relative_path, processor); if (processing_holder) { - return std::make_shared(val->key, val->info, processing_holder); + return std::make_shared(val->relative_path, val->metadata, processing_holder); } else if (sharded_processing - && metadata->getFileStatus(val->key)->state == S3QueueFilesMetadata::FileStatus::State::Processing) + && metadata->getFileStatus(val->relative_path)->state == S3QueueFilesMetadata::FileStatus::State::Processing) { throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} is processing by someone else in sharded processing. " - "It is a bug", val->key); + "It is a bug", val->relative_path); } } return {}; @@ -161,7 +161,7 @@ size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() StorageS3QueueSource::StorageS3QueueSource( String name_, const Block & header_, - std::unique_ptr internal_source_, + std::unique_ptr internal_source_, std::shared_ptr files_metadata_, size_t processing_id_, const S3QueueAction & action_, @@ -190,38 +190,19 @@ StorageS3QueueSource::StorageS3QueueSource( { } -StorageS3QueueSource::~StorageS3QueueSource() -{ - internal_source->create_reader_pool.wait(); -} - String StorageS3QueueSource::getName() const { return name; } -void StorageS3QueueSource::lazyInitialize() -{ - if (initialized) - return; - - internal_source->lazyInitialize(processing_id); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - initialized = true; -} - Chunk StorageS3QueueSource::generate() { - lazyInitialize(); - while (true) { if (!reader) break; - const auto * key_with_info = dynamic_cast(&reader.getKeyWithInfo()); + const auto * key_with_info = dynamic_cast(&reader.getObjectInfo()); auto file_status = key_with_info->processing_holder->getFileStatus(); if (isCancelled()) @@ -239,7 +220,7 @@ Chunk StorageS3QueueSource::generate() tryLogCurrentException(__PRETTY_FUNCTION__); } - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); } break; @@ -254,7 +235,7 @@ Chunk StorageS3QueueSource::generate() { LOG_DEBUG( log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed", - processed_rows_from_file, reader.getFile()); + processed_rows_from_file, reader.getRelativePath()); try { @@ -265,7 +246,7 @@ Chunk StorageS3QueueSource::generate() tryLogCurrentException(__PRETTY_FUNCTION__); } - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); /// Leave the file half processed. Table is being dropped, so we do not care. break; @@ -273,7 +254,7 @@ Chunk StorageS3QueueSource::generate() LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). " "Will process the file fully and then shutdown", - reader.getFile(), processed_rows_from_file); + reader.getRelativePath(), processed_rows_from_file); } auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); @@ -287,30 +268,30 @@ Chunk StorageS3QueueSource::generate() Chunk chunk; if (reader->pull(chunk)) { - LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getPath()); + LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getRelativePath()); file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getKeyWithInfo().info->size); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata.size_bytes); return chunk; } } catch (...) { const auto message = getCurrentExceptionMessage(true); - LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getFile(), message); + LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getRelativePath(), message); files_metadata->setFileFailed(key_with_info->processing_holder, message); - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); throw; } files_metadata->setFileProcessed(key_with_info->processing_holder); - applyActionAfterProcessing(reader.getFile()); + applyActionAfterProcessing(reader.getRelativePath()); - appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, true); + appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, true); file_status.reset(); processed_rows_from_file = 0; @@ -326,7 +307,7 @@ Chunk StorageS3QueueSource::generate() if (!reader) break; - file_status = files_metadata->getFileStatus(reader.getFile()); + file_status = files_metadata->getFileStatus(reader.getRelativePath()); /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 8fc7305ea08..fcf5c5c0160 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -5,7 +5,9 @@ #include #include #include -#include +#include +#include +#include #include @@ -14,28 +16,32 @@ namespace Poco { class Logger; } namespace DB { +struct ObjectMetadata; + class StorageS3QueueSource : public ISource, WithContext { public: - using IIterator = StorageS3Source::IIterator; - using KeyWithInfoPtr = StorageS3Source::KeyWithInfoPtr; - using GlobIterator = StorageS3Source::DisclosedGlobIterator; + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + + using ConfigurationPtr = Storage::ConfigurationPtr; + using GlobIterator = Source::GlobIterator; using ZooKeeperGetter = std::function; using RemoveFileFunc = std::function; using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr; using Metadata = S3QueueFilesMetadata; - struct S3QueueKeyWithInfo : public StorageS3Source::KeyWithInfo + struct S3QueueObjectInfo : public Source::ObjectInfo { - S3QueueKeyWithInfo( - const std::string & key_, - std::optional info_, - Metadata::ProcessingNodeHolderPtr processing_holder_); + S3QueueObjectInfo( + const std::string & key_, + const ObjectMetadata & object_metadata_, + Metadata::ProcessingNodeHolderPtr processing_holder_); Metadata::ProcessingNodeHolderPtr processing_holder; }; - class FileIterator : public IIterator + class FileIterator : public Source::IIterator { public: FileIterator( @@ -47,7 +53,7 @@ public: /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - KeyWithInfoPtr next(size_t idx) override; + Source::ObjectInfoPtr next(size_t processor) override; size_t estimatedKeysCount() override; @@ -60,14 +66,14 @@ public: const bool sharded_processing; const size_t current_shard; - std::unordered_map> sharded_keys; + std::unordered_map> sharded_keys; std::mutex sharded_keys_mutex; }; StorageS3QueueSource( String name_, const Block & header_, - std::unique_ptr internal_source_, + std::unique_ptr internal_source_, std::shared_ptr files_metadata_, size_t processing_id_, const S3QueueAction & action_, @@ -80,8 +86,6 @@ public: const StorageID & storage_id_, LoggerPtr log_); - ~StorageS3QueueSource() override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); String getName() const override; @@ -93,7 +97,7 @@ private: const S3QueueAction action; const size_t processing_id; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; + const std::shared_ptr internal_source; const NamesAndTypesList requested_virtual_columns; const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; @@ -103,13 +107,11 @@ private: RemoveFileFunc remove_file_func; LoggerPtr log; - using ReaderHolder = StorageS3Source::ReaderHolder; - ReaderHolder reader; - std::future reader_future; + Source::ReaderHolder reader; + std::future reader_future; std::atomic initialized{false}; size_t processed_rows_from_file = 0; - void lazyInitialize(); void applyActionAfterProcessing(const String & path); void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); }; diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp index 3ee2594135d..94816619aaa 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -7,7 +7,6 @@ #include #include #include -#include namespace DB @@ -33,7 +32,7 @@ namespace S3QueueTableMetadata::S3QueueTableMetadata( - const StorageS3::Configuration & configuration, + const StorageObjectStorageConfiguration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index 30642869930..942ce7973ef 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -3,7 +3,7 @@ #if USE_AWS_S3 #include -#include +#include #include namespace DB @@ -27,7 +27,10 @@ struct S3QueueTableMetadata UInt64 s3queue_processing_threads_num; S3QueueTableMetadata() = default; - S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); + S3QueueTableMetadata( + const StorageObjectStorageConfiguration & configuration, + const S3QueueSettings & engine_settings, + const StorageInMemoryMetadata & storage_metadata); void read(const String & metadata_str); static S3QueueTableMetadata parse(const String & metadata_str); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 0723205b544..fa7132f705a 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -50,11 +51,6 @@ namespace ErrorCodes namespace { - bool containsGlobs(const S3::URI & url) - { - return url.key.find_first_of("*?{") != std::string::npos; - } - std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings) { std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value; @@ -98,7 +94,7 @@ namespace StorageS3Queue::StorageS3Queue( std::unique_ptr s3queue_settings_, - const StorageS3::Configuration & configuration_, + const ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -116,29 +112,29 @@ StorageS3Queue::StorageS3Queue( , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) , log(getLogger("StorageS3Queue (" + table_id_.table_name + ")")) { - if (configuration.url.key.empty()) + if (configuration->getPath().empty()) { - configuration.url.key = "/*"; + configuration->setPath("/*"); } - else if (configuration.url.key.ends_with('/')) + else if (configuration->getPath().ends_with('/')) { - configuration.url.key += '*'; + configuration->setPath(configuration->getPath() + '*'); } - else if (!containsGlobs(configuration.url)) + else if (!configuration->isPathWithGlobs()) { throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); } checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); - configuration.update(context_); - FormatFactory::instance().checkFormatName(configuration.format); - context_->getRemoteHostFilter().checkURL(configuration.url.uri); + object_storage = configuration->createOrUpdateObjectStorage(context_); + FormatFactory::instance().checkFormatName(configuration->format); + configuration->check(context_); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_); + auto columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context_); storage_metadata.setColumns(columns); } else @@ -226,7 +222,7 @@ void StorageS3Queue::drop() bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings); } class ReadFromS3Queue : public SourceStepWithFilter @@ -345,38 +341,20 @@ std::shared_ptr StorageS3Queue::createSource( size_t max_block_size, ContextPtr local_context) { - auto configuration_snapshot = updateConfigurationAndGetCopy(local_context); - - auto internal_source = std::make_unique( - info, configuration.format, getName(), local_context, format_settings, + auto internal_source = std::make_unique( + getName(), + object_storage, + configuration, + info, + format_settings, + local_context, max_block_size, - configuration_snapshot.request_settings, - configuration_snapshot.compression_method, - configuration_snapshot.client, - configuration_snapshot.url.bucket, - configuration_snapshot.url.version_id, - configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()), - file_iterator, local_context->getSettingsRef().max_download_threads, false); + file_iterator, + false); - auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable + auto file_deleter = [=, this](const std::string & path) mutable { - S3::DeleteObjectRequest request; - request.WithKey(path).WithBucket(bucket); - auto outcome = client->DeleteObject(request); - if (blob_storage_log) - blob_storage_log->addEvent( - BlobStorageLogElement::EventType::Delete, - bucket, path, {}, 0, outcome.IsSuccess() ? nullptr : &outcome.GetError()); - - if (!outcome.IsSuccess()) - { - const auto & err = outcome.GetError(); - LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); - } - else - { - LOG_TRACE(log, "Object with path {} was removed from S3", path); - } + object_storage->removeObject(StoredObject(path)); }; auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; return std::make_shared( @@ -470,7 +448,6 @@ bool StorageS3Queue::streamToViews() auto s3queue_context = Context::createCopy(getContext()); s3queue_context->makeQueryContext(); - auto query_configuration = updateConfigurationAndGetCopy(s3queue_context); // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns @@ -505,12 +482,6 @@ bool StorageS3Queue::streamToViews() return rows > 0; } -StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context) -{ - configuration.update(local_context); - return configuration; -} - zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const { return getContext()->getZooKeeper(); @@ -530,7 +501,7 @@ void StorageS3Queue::createOrCheckMetadata(const StorageInMemoryMetadata & stora } else { - std::string metadata = S3QueueTableMetadata(configuration, *s3queue_settings, storage_metadata).toString(); + std::string metadata = S3QueueTableMetadata(*configuration, *s3queue_settings, storage_metadata).toString(); requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processed", "", zkutil::CreateMode::Persistent)); requests.emplace_back(zkutil::makeCreateRequest(zk_path / "failed", "", zkutil::CreateMode::Persistent)); @@ -568,7 +539,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata"); auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str); - S3QueueTableMetadata old_metadata(configuration, *s3queue_settings, storage_metadata); + S3QueueTableMetadata old_metadata(*configuration, *s3queue_settings, storage_metadata); old_metadata.checkEquals(metadata_from_zk); auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); @@ -584,14 +555,25 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const } } -std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) +std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr , const ActionsDAG::Node * predicate) { - auto glob_iterator = std::make_unique( - *configuration.client, configuration.url, predicate, virtual_columns, local_context, - /* read_keys */nullptr, configuration.request_settings); + auto glob_iterator = std::make_unique(object_storage, configuration, predicate, virtual_columns, nullptr); + return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } +static void initializeConfiguration( + StorageObjectStorageConfiguration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); +} + void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) { factory.registerStorage( @@ -602,7 +584,8 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) if (engine_args.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); + auto configuration = std::make_shared(); + initializeConfiguration(*configuration, args.engine_args, args.getContext(), false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index fd3b4bb4914..88f9bd65093 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -26,11 +26,13 @@ class S3QueueFilesMetadata; class StorageS3Queue : public IStorage, WithContext { public: - using Configuration = typename StorageS3::Configuration; + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + using ConfigurationPtr = Storage::ConfigurationPtr; StorageS3Queue( std::unique_ptr s3queue_settings_, - const Configuration & configuration_, + ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -53,8 +55,6 @@ public: NamesAndTypesList getVirtuals() const override { return virtual_columns; } - const auto & getFormatName() const { return configuration.format; } - const fs::path & getZooKeeperPath() const { return zk_path; } zkutil::ZooKeeperPtr getZooKeeper() const; @@ -68,7 +68,8 @@ private: const S3QueueAction after_processing; std::shared_ptr files_metadata; - Configuration configuration; + ConfigurationPtr configuration; + ObjectStoragePtr object_storage; const std::optional format_settings; NamesAndTypesList virtual_columns; @@ -103,7 +104,6 @@ private: void createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata); void checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata); - Configuration updateConfigurationAndGetCopy(ContextPtr local_context); }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp deleted file mode 100644 index c09db0bfb7b..00000000000 --- a/src/Storages/StorageAzureBlob.cpp +++ /dev/null @@ -1,1478 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -using namespace Azure::Storage::Blobs; - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; - -} - -namespace -{ - -const std::unordered_set required_configuration_keys = { - "blob_path", - "container", -}; - -const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "structure", - "compression_method", - "account_name", - "account_key", - "connection_string", - "storage_account_url", -}; - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - if (collection.has("connection_string")) - { - configuration.connection_url = collection.get("connection_string"); - configuration.is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { - configuration.connection_url = collection.get("storage_account_url"); - configuration.is_connection_string = false; - } - - configuration.container = collection.get("container"); - configuration.blob_path = collection.get("blob_path"); - - if (collection.has("account_name")) - configuration.account_name = collection.get("account_name"); - - if (collection.has("account_key")) - configuration.account_key = collection.get("account_key"); - - configuration.structure = collection.getOrDefault("structure", "auto"); - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); -} - - -StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, ContextPtr local_context) -{ - StorageAzureBlob::Configuration configuration; - - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; - } - - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage AzureBlobStorage requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool - { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); - }; - - if (engine_args.size() == 4) - { - //'c1 UInt64, c2 UInt64 - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; -} - - -AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(ContextPtr local_context) -{ - const auto & context_settings = local_context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - - return settings_ptr; -} - -void registerStorageAzureBlob(StorageFactory & factory) -{ - factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - auto settings = StorageAzureBlob::createSettings(args.getContext()); - - return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing */ false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::AZURE, - }); -} - -static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) -{ - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client->ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; -} - -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only) -{ - AzureClientPtr result; - - if (configuration.is_connection_string) - { - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - bool container_exists = containerExists(blob_service_client,configuration.container); - - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - - try - { - result->CreateIfNotExists(); - } catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } - else - { - std::shared_ptr storage_shared_key_credential; - if (configuration.account_name.has_value() && configuration.account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*configuration.account_name, *configuration.account_key); - } - - std::unique_ptr blob_service_client; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); - } - else - { - blob_service_client = std::make_unique(configuration.connection_url); - } - - bool container_exists = containerExists(blob_service_client,configuration.container); - - std::string final_url; - size_t pos = configuration.connection_url.find('?'); - if (pos != std::string::npos) - { - auto url_without_sas = configuration.connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container - + configuration.connection_url.substr(pos); - } - else - final_url - = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; - - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - result = std::make_unique(final_url); - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - result = std::make_unique(final_url); - } - else - { - throw; - } - } - } - } - - return result; -} - -Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - - -StorageAzureBlob::StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - ContextPtr context, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , name("AzureBlobStorage") - , configuration(configuration_) - , object_storage(std::move(object_storage_)) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - FormatFactory::instance().checkFormatName(configuration.format); - context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL()); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing); - storage_metadata.setColumns(columns); - } - else - { - /// We don't allow special columns in File storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) -{ - if (configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", - configuration.blob_path); - } - - StoredObjects objects; - for (const auto & key : configuration.blobs_paths) - objects.emplace_back(key); - - object_storage->removeObjectsIfExist(objects); -} - -namespace -{ - -class StorageAzureBlobSink : public SinkToStorage -{ -public: - StorageAzureBlobSink( - const String & format, - const Block & sample_block_, - ContextPtr context, - std::optional format_settings_, - const CompressionMethod compression_method, - AzureObjectStorage * object_storage, - const String & blob_path) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - StoredObject object(blob_path); - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - object_storage->writeObject(object, WriteMode::Rewrite), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageAzureBlobSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - -class PartitionedStorageAzureBlobSink : public PartitionedSink -{ -public: - PartitionedStorageAzureBlobSink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - AzureObjectStorage * object_storage_, - const String & blob_) - : PartitionedSink(partition_by, context_, sample_block_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - , object_storage(object_storage_) - , blob(blob_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_key = replaceWildcards(blob, partition_id); - validateKey(partition_key); - - return std::make_shared( - format, - sample_block, - context, - format_settings, - compression_method, - object_storage, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const ContextPtr context; - const CompressionMethod compression_method; - AzureObjectStorage * object_storage; - const String blob; - const std::optional format_settings; - - ExpressionActionsPtr partition_by_expr; - - static void validateKey(const String & str) - { - validatePartitionKey(str, true); - } -}; - -} - -class ReadFromAzureBlob : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromAzureBlob"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters() override; - - ReadFromAzureBlob( - Block sample_block, - std::shared_ptr storage_, - ReadFromFormatInfo info_, - const bool need_only_count_, - ContextPtr context_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , storage(std::move(storage_)) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , context(std::move(context_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - std::shared_ptr storage; - ReadFromFormatInfo info; - const bool need_only_count; - - ContextPtr context; - - size_t max_block_size; - const size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromAzureBlob::applyFilters() -{ - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageAzureBlob::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - if (partition_by && configuration.withWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet"); - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - read_from_format_info.source_header, - std::move(this_ptr), - std::move(read_from_format_info), - need_only_count, - local_context, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - const auto & configuration = storage->configuration; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared(context, - context->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blob_path, - predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback()); - } - else - { - iterator_wrapper = std::make_shared( - storage->object_storage.get(), configuration.container, configuration.blobs_paths, - predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - const auto & configuration = storage->configuration; - Pipes pipes; - - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - configuration.format, - getName(), - context, - storage->format_settings, - max_block_size, - configuration.compression_method, - storage->object_storage.get(), - configuration.container, - configuration.connection_url, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(configuration.blobs_paths.back(), configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration.withWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - configuration.blobs_paths.back()); - } - else - { - if (configuration.withGlobs()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); - - bool truncate_in_insert = local_context->getSettingsRef().azure_truncate_on_insert; - - if (!truncate_in_insert && object_storage->exists(StoredObject(configuration.blob_path))) - { - - if (local_context->getSettingsRef().azure_create_new_file_on_insert) - { - size_t index = configuration.blobs_paths.size(); - const auto & first_key = configuration.blobs_paths[0]; - auto pos = first_key.find_first_of('.'); - String new_key; - - do - { - new_key = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos)); - ++index; - } - while (object_storage->exists(StoredObject(new_key))); - - configuration.blobs_paths.push_back(new_key); - } - else - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting azure_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting azure_create_new_file_on_insert", - configuration.container, configuration.blobs_paths.back()); - } - } - - return std::make_shared( - configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - object_storage.get(), - configuration.blobs_paths.back()); - } -} - -NamesAndTypesList StorageAzureBlob::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageAzureBlob::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - -bool StorageAzureBlob::supportsPartitionBy() const -{ - return true; -} - -bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageAzureBlob::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -StorageAzureBlobSource::GlobIterator::GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , blob_path_with_globs(blob_path_with_globs_) - , virtual_columns(virtual_columns_) - , outer_blobs(outer_blobs_) - , file_progress_callback(file_progress_callback_) -{ - - const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs.size()) - { - auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs); - blobs_with_metadata.emplace_back( - blob_path_with_globs, - object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata.back()); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - is_finished = true; - return; - } - - object_storage_iterator = object_storage->iterate(key_prefix); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(blob_path_with_globs)); - - if (!matcher->ok()) - throw Exception( - ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error()); - - recursive = blob_path_with_globs == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); -} - -RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() -{ - std::lock_guard lock(next_mutex); - - if (is_finished && index >= blobs_with_metadata.size()) - { - return {}; - } - - bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); - - if (need_new_batch) - { - RelativePathsWithMetadata new_batch; - while (new_batch.empty()) - { - auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext(); - if (result.has_value()) - { - new_batch = result.value(); - } - else - { - is_finished = true; - return {}; - } - - for (auto it = new_batch.begin(); it != new_batch.end();) - { - if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher)) - it = new_batch.erase(it); - else - ++it; - } - } - - index = 0; - - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (auto & path_with_metadata : new_batch) - paths.push_back(fs::path(container) / path_with_metadata.relative_path); - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - } - - if (outer_blobs) - outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); - - blobs_with_metadata = std::move(new_batch); - if (file_progress_callback) - { - for (const auto & [relative_path, info] : blobs_with_metadata) - { - file_progress_callback(FileProgress(0, info.size_bytes)); - } - } - } - - size_t current_index = index++; - if (current_index >= blobs_with_metadata.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return blobs_with_metadata[current_index]; -} - -StorageAzureBlobSource::KeysIterator::KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback) - : IIterator(context_) - , object_storage(object_storage_) - , container(container_) - , virtual_columns(virtual_columns_) -{ - Strings all_keys = keys_; - - ASTPtr filter_ast; - if (!all_keys.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - Strings paths; - paths.reserve(all_keys.size()); - for (const auto & key : all_keys) - paths.push_back(fs::path(container) / key); - - VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext()); - } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - keys.emplace_back(key, object_metadata); - } - - if (outer_blobs) - *outer_blobs = keys; -} - -RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - return keys[current_index]; -} - -Chunk StorageAzureBlobSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - fs::path(container) / reader.getRelativePath(), - reader.getRelativePathWithMetadata().metadata.size_bytes); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(connection_url) / container / path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata) -{ - String source = fs::path(connection_url) / container / path_with_metadata.relative_path; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = path_with_metadata.metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - return std::nullopt; - }; - - return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -StorageAzureBlobSource::StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - ContextPtr context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , format(format_) - , name(std::move(name_)) - , sample_block(info.format_header) - , format_settings(format_settings_) - , columns_desc(info.columns_description) - , max_block_size(max_block_size_) - , compression_hint(compression_hint_) - , object_storage(std::move(object_storage_)) - , container(container_) - , connection_url(connection_url_) - , file_iterator(file_iterator_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "AzureReader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - - -StorageAzureBlobSource::~StorageAzureBlobSource() -{ - create_reader_pool.wait(); -} - -String StorageAzureBlobSource::getName() const -{ - return name; -} - -StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() -{ - auto path_with_metadata = file_iterator->next(); - if (path_with_metadata.relative_path.empty()) - return {}; - - if (path_with_metadata.metadata.size_bytes == 0) - path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint); - read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes); - auto input_format = FormatFactory::instance().getInput( - format, *read_buf, sample_block, getContext(), max_block_size, - format_settings, max_parsing_threads, std::nullopt, - /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageAzureBlobSource::createReaderAsync() -{ - return create_reader_scheduler([this] { return createReader(); }, Priority{}); -} - -std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size); - return createAsyncAzureReadBuffer(key, read_settings, object_size); - } - - return object_storage->readObject(StoredObject(key), read_settings, {}, object_size); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::shared_ptr & file_iterator_, - AzureObjectStorage * object_storage_, - const StorageAzureBlob::Configuration & configuration_, - const std::optional & format_settings_, - const RelativePathsWithMetadata & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , object_storage(object_storage_) - , configuration(configuration_) - , format_settings(format_settings_) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } - - std::pair, std::optional> next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns}; - } - - current_path_with_metadata = file_iterator->next(); - - if (current_path_with_metadata.relative_path.empty()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path " - "in AzureBlobStorage. You must specify table structure manually", configuration.format); - - return {nullptr, std::nullopt}; - } - - first = false; - - /// AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - return {nullptr, columns_from_cache}; - } - else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - RelativePathsWithMetadata paths = {current_path_with_metadata}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache}; - } - - first = false; - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return {wrapReadBufferWithCompressionMethod( - object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes), - chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method), - zstd_window_log_max), std::nullopt}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path; - auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; }); - auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext()); - StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - String getLastFileName() const override { return current_path_with_metadata.relative_path; } - - private: - std::optional tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end) - { - auto & schema_cache = StorageAzureBlob::getSchemaCache(getContext()); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] -> std::optional - { - if (it->metadata.last_modified) - return it->metadata.last_modified->epochTime(); - return std::nullopt; - }; - - auto host_and_bucket = configuration.connection_url + '/' + configuration.container; - String source = host_and_bucket + '/' + it->relative_path; - auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - AzureObjectStorage * object_storage; - const StorageAzureBlob::Configuration & configuration; - const std::optional & format_settings; - const RelativePathsWithMetadata & read_keys; - size_t prev_read_keys_size; - RelativePathWithMetadata current_path_with_metadata; - bool first = true; - }; -} - -ColumnsDescription StorageAzureBlob::getTableStructureFromData( - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx, - bool distributed_processing) -{ - RelativePathsWithMetadata read_keys; - std::shared_ptr file_iterator; - if (distributed_processing) - { - file_iterator = std::make_shared(ctx, - ctx->getReadTaskCallback()); - } - else if (configuration.withGlobs()) - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - else - { - file_iterator = std::make_shared( - object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys); - } - - ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx); - return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx); -} - -SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - - -std::unique_ptr StorageAzureBlobSource::createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto modified_settings{read_settings}; - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -} - -#endif diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h deleted file mode 100644 index 6fc3c5ce592..00000000000 --- a/src/Storages/StorageAzureBlob.h +++ /dev/null @@ -1,339 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class StorageAzureBlob : public IStorage -{ -public: - - using AzureClient = Azure::Storage::Blobs::BlobContainerClient; - using AzureClientPtr = std::unique_ptr; - - struct Configuration : public StatelessTableEngineConfiguration - { - Configuration() = default; - - String getPath() const { return blob_path; } - - bool update(ContextPtr context); - - void connect(ContextPtr context); - - bool withGlobs() const { return blob_path.find_first_of("*?{") != std::string::npos; } - - bool withWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos; - } - - Poco::URI getConnectionURL() const; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; - std::string blob_path; - std::vector blobs_paths; - }; - - StorageAzureBlob( - const Configuration & configuration_, - std::unique_ptr && object_storage_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_); - - static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); - - static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); - - static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection); - - String getName() const override - { - return name; - } - - void read( - QueryPlan & query_plan, - const Names &, - const StorageSnapshotPtr &, - SelectQueryInfo &, - ContextPtr, - QueryProcessingStage::Enum, - size_t, - size_t) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr context, bool /*async_insert*/) override; - - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - - bool supportsPartitionBy() const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsSubsetOfColumns(const ContextPtr & context) const; - - bool supportsTrivialCountOptimization() const override { return true; } - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - static ColumnsDescription getTableStructureFromData( - AzureObjectStorage * object_storage, - const Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx, - bool distributed_processing = false); - -private: - friend class ReadFromAzureBlob; - - std::string name; - Configuration configuration; - std::unique_ptr object_storage; - NamesAndTypesList virtual_columns; - - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; -}; - -class StorageAzureBlobSource : public ISource, WithContext -{ -public: - class IIterator : public WithContext - { - public: - IIterator(ContextPtr context_):WithContext(context_) {} - virtual ~IIterator() = default; - virtual RelativePathWithMetadata next() = 0; - - RelativePathWithMetadata operator ()() { return next(); } - }; - - class GlobIterator : public IIterator - { - public: - GlobIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - String blob_path_with_globs_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_, - std::function file_progress_callback_ = {}); - - RelativePathWithMetadata next() override; - ~GlobIterator() override = default; - - private: - AzureObjectStorage * object_storage; - std::string container; - String blob_path_with_globs; - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; - - size_t index = 0; - - RelativePathsWithMetadata blobs_with_metadata; - RelativePathsWithMetadata * outer_blobs; - ObjectStorageIteratorPtr object_storage_iterator; - bool recursive{false}; - - std::unique_ptr matcher; - - void createFilterAST(const String & any_key); - bool is_finished = false; - std::mutex next_mutex; - - std::function file_progress_callback; - }; - - class ReadIterator : public IIterator - { - public: - explicit ReadIterator(ContextPtr context_, - const ReadTaskCallback & callback_) - : IIterator(context_), callback(callback_) { } - RelativePathWithMetadata next() override - { - return {callback(), {}}; - } - - private: - ReadTaskCallback callback; - }; - - class KeysIterator : public IIterator - { - public: - KeysIterator( - AzureObjectStorage * object_storage_, - const std::string & container_, - const Strings & keys_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - RelativePathsWithMetadata * outer_blobs, - std::function file_progress_callback = {}); - - RelativePathWithMetadata next() override; - ~KeysIterator() override = default; - - private: - AzureObjectStorage * object_storage; - std::string container; - RelativePathsWithMetadata keys; - - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; - - std::atomic index = 0; - }; - - StorageAzureBlobSource( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - ContextPtr context_, - std::optional format_settings_, - UInt64 max_block_size_, - String compression_hint_, - AzureObjectStorage * object_storage_, - const String & container_, - const String & connection_url_, - std::shared_ptr file_iterator_, - bool need_only_count_); - ~StorageAzureBlobSource() override; - - Chunk generate() override; - - String getName() const override; - -private: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const RelativePathWithMetadata & path_with_metadata); - - NamesAndTypesList requested_columns; - NamesAndTypesList requested_virtual_columns; - String format; - String name; - Block sample_block; - std::optional format_settings; - ColumnsDescription columns_desc; - UInt64 max_block_size; - String compression_hint; - AzureObjectStorage * object_storage; - String container; - String connection_url; - std::shared_ptr file_iterator; - bool need_only_count; - size_t total_rows_in_file = 0; - - struct ReaderHolder - { - public: - ReaderHolder( - RelativePathWithMetadata relative_path_with_metadata_, - std::unique_ptr read_buf_, - std::shared_ptr source_, - std::unique_ptr pipeline_, - std::unique_ptr reader_) - : relative_path_with_metadata(std::move(relative_path_with_metadata_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } - - ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; - - ReaderHolder(ReaderHolder && other) noexcept - { - *this = std::move(other); - } - - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - relative_path_with_metadata = std::move(other.relative_path_with_metadata); - return *this; - } - - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getRelativePath() const { return relative_path_with_metadata.relative_path; } - const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; } - const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } - - private: - RelativePathWithMetadata relative_path_with_metadata; - std::unique_ptr read_buf; - std::shared_ptr source; - std::unique_ptr pipeline; - std::unique_ptr reader; - }; - - ReaderHolder reader; - - LoggerPtr log = getLogger("StorageAzureBlobSource"); - - ThreadPool create_reader_pool; - ThreadPoolCallbackRunner create_reader_scheduler; - std::future reader_future; - - /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(); - std::future createReaderAsync(); - - std::unique_ptr createAzureReadBuffer(const String & key, size_t object_size); - std::unique_ptr createAsyncAzureReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size); -}; - -} - -#endif diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp deleted file mode 100644 index 1d587512f38..00000000000 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "Storages/StorageAzureBlobCluster.h" - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageAzureBlobCluster::StorageAzureBlobCluster( - const String & cluster_name_, - const StorageAzureBlob::Configuration & configuration_, - std::unique_ptr && object_storage_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) - , configuration{configuration_} - , object_storage(std::move(object_storage_)) -{ - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function - auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false); - storage_metadata.setColumns(columns); - } - else - storage_metadata.setColumns(columns_); - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - - TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context); -} - -RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared( - object_storage.get(), configuration.container, configuration.blob_path, - predicate, virtual_columns, context, nullptr); - auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - -NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const -{ - return virtual_columns; -} - - -} - -#endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h deleted file mode 100644 index 2831b94f825..00000000000 --- a/src/Storages/StorageAzureBlobCluster.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -#include "Client/Connection.h" -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageAzureBlobCluster : public IStorageCluster -{ -public: - StorageAzureBlobCluster( - const String & cluster_name_, - const StorageAzureBlob::Configuration & configuration_, - std::unique_ptr && object_storage_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_); - - std::string getName() const override { return "AzureBlobStorageCluster"; } - - NamesAndTypesList getVirtuals() const override; - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization() const override { return true; } - -private: - void updateBeforeRead(const ContextPtr & /*context*/) override {} - - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; - - StorageAzureBlob::Configuration configuration; - NamesAndTypesList virtual_columns; - std::unique_ptr object_storage; -}; - - -} - -#endif diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp deleted file mode 100644 index 4fde6fd04f3..00000000000 --- a/src/Storages/StorageS3.cpp +++ /dev/null @@ -1,1905 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#ifdef __clang__ -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif -#include -#ifdef __clang__ -# pragma clang diagnostic pop -#endif - -namespace fs = std::filesystem; - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; - extern const Metric StorageS3ThreadsScheduled; -} - -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ - -static const std::unordered_set required_configuration_keys = { - "url", -}; -static const std::unordered_set optional_configuration_keys = { - "format", - "compression", - "compression_method", - "structure", - "access_key_id", - "secret_access_key", - "session_token", - "filename", - "use_environment_credentials", - "max_single_read_retries", - "min_upload_part_size", - "upload_part_size_multiply_factor", - "upload_part_size_multiply_parts_count_threshold", - "max_single_part_upload_size", - "max_connections", - "expiration_window_seconds", - "no_sign_request" -}; - -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_TEXT; - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int S3_ERROR; - extern const int UNEXPECTED_EXPRESSION; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_COMPILE_REGEXP; - extern const int FILE_DOESNT_EXIST; -} - - -class ReadFromStorageS3Step : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromStorageS3Step"; } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void applyFilters() override; - - ReadFromStorageS3Step( - Block sample_block, - const Names & column_names_, - StorageSnapshotPtr storage_snapshot_, - StorageS3 & storage_, - ReadFromFormatInfo read_from_format_info_, - bool need_only_count_, - ContextPtr context_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , column_names(column_names_) - , storage_snapshot(std::move(storage_snapshot_)) - , storage(storage_) - , read_from_format_info(std::move(read_from_format_info_)) - , need_only_count(need_only_count_) - , local_context(std::move(context_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - query_configuration = storage.updateConfigurationAndGetCopy(local_context); - virtual_columns = storage.getVirtuals(); - } - -private: - Names column_names; - StorageSnapshotPtr storage_snapshot; - StorageS3 & storage; - ReadFromFormatInfo read_from_format_info; - bool need_only_count; - StorageS3::Configuration query_configuration; - NamesAndTypesList virtual_columns; - - ContextPtr local_context; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - - -class IOutputFormat; -using OutputFormatPtr = std::shared_ptr; - -class StorageS3Source::DisclosedGlobIterator::Impl : WithContext -{ -public: - Impl( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context_, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : WithContext(context_) - , client(client_.clone()) - , globbed_uri(globbed_uri_) - , virtual_columns(virtual_columns_) - , read_keys(read_keys_) - , request_settings(request_settings_) - , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) - , file_progress_callback(file_progress_callback_) - { - if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); - - const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == globbed_uri.key.size()) - { - buffer.emplace_back(std::make_shared(globbed_uri.key, std::nullopt)); - buffer_iter = buffer.begin(); - is_finished = true; - return; - } - - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - request.SetMaxKeys(static_cast(request_settings.list_object_keys_size)); - - outcome_future = listObjectsAsync(); - - matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error()); - - recursive = globbed_uri.key == "/**" ? true : false; - - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - fillInternalBufferAssumeLocked(); - } - - KeyWithInfoPtr next(size_t) - { - std::lock_guard lock(mutex); - return nextAssumeLocked(); - } - - size_t objectsCount() - { - return buffer.size(); - } - - ~Impl() - { - list_objects_pool.wait(); - } - -private: - using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; - - KeyWithInfoPtr nextAssumeLocked() - { - if (buffer_iter != buffer.end()) - { - auto answer = *buffer_iter; - ++buffer_iter; - - /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key. - /// So we get object info lazily here on 'next()' request. - if (!answer->info) - { - answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings); - if (file_progress_callback) - file_progress_callback(FileProgress(0, answer->info->size)); - } - - return answer; - } - - if (is_finished) - return {}; - - try - { - fillInternalBufferAssumeLocked(); - } - catch (...) - { - /// In case of exception thrown while listing new batch of files - /// iterator may be partially initialized and its further using may lead to UB. - /// Iterator is used by several processors from several threads and - /// it may take some time for threads to stop processors and they - /// may still use this iterator after exception is thrown. - /// To avoid this UB, reset the buffer and return defaults for further calls. - is_finished = true; - buffer.clear(); - buffer_iter = buffer.begin(); - throw; - } - - return nextAssumeLocked(); - } - - void fillInternalBufferAssumeLocked() - { - buffer.clear(); - assert(outcome_future.valid()); - auto outcome = outcome_future.get(); - - if (!outcome.IsSuccess()) - { - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - const auto & result_batch = outcome.GetResult().GetContents(); - - /// It returns false when all objects were returned - is_finished = !outcome.GetResult().GetIsTruncated(); - - if (!is_finished) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - list_objects_pool.wait(); - outcome_future = listObjectsAsync(); - } - - if (request_settings.throw_on_zero_files_match && result_batch.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix()); - - KeysWithInfo temp_buffer; - temp_buffer.reserve(result_batch.size()); - - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - S3::ObjectInfo info = - { - .size = size_t(row.GetSize()), - .last_modification_time = row.GetLastModified().Millis() / 1000, - }; - - temp_buffer.emplace_back(std::make_shared(std::move(key), std::move(info))); - } - } - - if (temp_buffer.empty()) - { - buffer_iter = buffer.begin(); - return; - } - - if (filter_dag) - { - std::vector paths; - paths.reserve(temp_buffer.size()); - for (const auto & key_with_info : temp_buffer) - paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key); - - VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext()); - } - - buffer = std::move(temp_buffer); - - if (file_progress_callback) - { - for (const auto & key_with_info : buffer) - file_progress_callback(FileProgress(0, key_with_info->info->size)); - } - - /// Set iterator only after the whole batch is processed - buffer_iter = buffer.begin(); - - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); - } - - std::future listObjectsAsync() - { - return list_objects_scheduler([this] - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); - - /// Outcome failure will be handled on the caller side. - if (outcome.IsSuccess()) - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - return outcome; - }, Priority{}); - } - - std::mutex mutex; - - KeysWithInfo buffer; - KeysWithInfo::iterator buffer_iter; - - std::unique_ptr client; - S3::URI globbed_uri; - ASTPtr query; - NamesAndTypesList virtual_columns; - ActionsDAGPtr filter_dag; - std::unique_ptr matcher; - bool recursive{false}; - bool is_finished{false}; - KeysWithInfo * read_keys; - - S3::ListObjectsV2Request request; - S3Settings::RequestSettings request_settings; - - ThreadPool list_objects_pool; - ThreadPoolCallbackRunner list_objects_scheduler; - std::future outcome_future; - std::function file_progress_callback; -}; - -StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns_, - ContextPtr context, - KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_, - std::function file_progress_callback_) - : pimpl(std::make_shared(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -class StorageS3Source::KeysIterator::Impl -{ -public: - explicit Impl( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys_, - std::function file_progress_callback_) - : keys(keys_) - , client(client_.clone()) - , version_id(version_id_) - , bucket(bucket_) - , request_settings(request_settings_) - , file_progress_callback(file_progress_callback_) - { - if (read_keys_) - { - for (const auto & key : keys) - read_keys_->push_back(std::make_shared(key)); - } - } - - KeyWithInfoPtr next(size_t) - { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - auto key = keys[current_index]; - std::optional info; - if (file_progress_callback) - { - info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); - file_progress_callback(FileProgress(0, info->size)); - } - - return std::make_shared(key, info); - } - - size_t objectsCount() - { - return keys.size(); - } - -private: - Strings keys; - std::atomic_size_t index = 0; - std::unique_ptr client; - String version_id; - String bucket; - S3Settings::RequestSettings request_settings; - std::function file_progress_callback; -}; - -StorageS3Source::KeysIterator::KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys, - std::function file_progress_callback_) - : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, - read_keys, file_progress_callback_)) -{ -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT -{ - return pimpl->next(idx); -} - -size_t StorageS3Source::KeysIterator::estimatedKeysCount() -{ - return pimpl->objectsCount(); -} - -StorageS3Source::ReadTaskIterator::ReadTaskIterator( - const DB::ReadTaskCallback & callback_, - size_t max_threads_count) - : callback(callback_) -{ - ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count); - auto pool_scheduler = threadPoolCallbackRunner(pool, "S3ReadTaskItr"); - - std::vector> keys; - keys.reserve(max_threads_count); - for (size_t i = 0; i < max_threads_count; ++i) - keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); - - pool.wait(); - buffer.reserve(max_threads_count); - for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get(), std::nullopt)); -} - -StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT -{ - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= buffer.size()) - return std::make_shared(callback()); - - return buffer[current_index]; -} - -size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() -{ - return buffer.size(); -} - -StorageS3Source::StorageS3Source( - const ReadFromFormatInfo & info, - const String & format_, - String name_, - ContextPtr context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket_, - const String & version_id_, - const String & url_host_and_port_, - std::shared_ptr file_iterator_, - const size_t max_parsing_threads_, - bool need_only_count_) - : SourceWithKeyCondition(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , bucket(bucket_) - , version_id(version_id_) - , url_host_and_port(url_host_and_port_) - , format(format_) - , columns_desc(info.columns_description) - , requested_columns(info.requested_columns) - , max_block_size(max_block_size_) - , request_settings(request_settings_) - , compression_hint(std::move(compression_hint_)) - , client(client_) - , sample_block(info.format_header) - , format_settings(format_settings_) - , requested_virtual_columns(info.requested_virtual_columns) - , file_iterator(file_iterator_) - , max_parsing_threads(max_parsing_threads_) - , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) - , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) -{ -} - -void StorageS3Source::lazyInitialize(size_t idx) -{ - if (initialized) - return; - - reader = createReader(idx); - if (reader) - reader_future = createReaderAsync(idx); - initialized = true; -} - -StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) -{ - KeyWithInfoPtr key_with_info; - do - { - key_with_info = file_iterator->next(idx); - if (!key_with_info || key_with_info->key.empty()) - return {}; - - if (!key_with_info->info) - key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings); - } - while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared(sample_block, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - auto compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer(key_with_info->key, key_with_info->info->size); - - auto input_format = FormatFactory::instance().getInput( - format, - *read_buf, - sample_block, - getContext(), - max_block_size, - format_settings, - max_parsing_threads, - /* max_download_threads= */ std::nullopt, - /* is_remote_fs */ true, - compression_method, - need_only_count); - - if (key_condition) - input_format->setKeyCondition(key_condition); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { return std::make_shared(header, columns_desc, *input_format, getContext()); }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -std::future StorageS3Source::createReaderAsync(size_t idx) -{ - return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); -} - -std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - { - LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size); - } - - return std::make_unique( - client, bucket, key, version_id, request_settings, read_settings, - /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0, - /*restricted_seek_*/ false, object_size); -} - -std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) -{ - auto context = getContext(); - auto read_buffer_creator = - [this, read_settings, object_size] - (const std::string & path, size_t read_until_position) -> std::unique_ptr - { - return std::make_unique( - client, - bucket, - path, - version_id, - request_settings, - read_settings, - /* use_external_buffer */true, - /* offset */0, - read_until_position, - /* restricted_seek */true, - object_size); - }; - - auto s3_impl = std::make_unique( - std::move(read_buffer_creator), - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, - read_settings, - /* cache_log */nullptr, /* use_external_buffer */true); - - auto modified_settings{read_settings}; - /// FIXME: Changing this setting to default value breaks something around parquet reading - modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; - - auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, - context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); - - async_reader->setReadUntilEnd(); - if (read_settings.remote_fs_prefetch) - async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - return async_reader; -} - -StorageS3Source::~StorageS3Source() -{ - create_reader_pool.wait(); -} - -String StorageS3Source::getName() const -{ - return name; -} - -Chunk StorageS3Source::generate() -{ - lazyInitialize(); - - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize()); - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getFile(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -void StorageS3Source::addNumRowsToCache(const String & key, size_t num_rows) -{ - String source = fs::path(url_host_and_port) / bucket / key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info) -{ - String source = fs::path(url_host_and_port) / bucket / key_with_info.key; - auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - return key_with_info.info->last_modification_time; - }; - - return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class StorageS3Sink : public SinkToStorage -{ -public: - StorageS3Sink( - const String & format, - const Block & sample_block_, - ContextPtr context, - std::optional format_settings_, - const CompressionMethod compression_method, - const StorageS3::Configuration & configuration_, - const String & bucket, - const String & key) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - BlobStorageLogWriterPtr blob_log = nullptr; - if (auto blob_storage_log = context->getBlobStorageLog()) - { - blob_log = std::make_shared(std::move(blob_storage_log)); - blob_log->query_id = context->getCurrentQueryId(); - } - - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - configuration_.client, - bucket, - key, - DBMS_DEFAULT_BUFFER_SIZE, - configuration_.request_settings, - std::move(blob_log), - std::nullopt, - threadPoolCallbackRunner(getIOThreadPool().get(), "S3ParallelWrite"), - context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer - = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings); - } - - String getName() const override { return "StorageS3Sink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf.reset(); - } - - Block sample_block; - std::optional format_settings; - std::unique_ptr write_buf; - OutputFormatPtr writer; - bool cancelled = false; - std::mutex cancel_mutex; -}; - - -class PartitionedStorageS3Sink : public PartitionedSink -{ -public: - PartitionedStorageS3Sink( - const ASTPtr & partition_by, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - std::optional format_settings_, - const CompressionMethod compression_method_, - const StorageS3::Configuration & configuration_, - const String & bucket_, - const String & key_) - : PartitionedSink(partition_by, context_, sample_block_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - , configuration(configuration_) - , bucket(bucket_) - , key(key_) - , format_settings(format_settings_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - return std::make_shared( - format, - sample_block, - context, - format_settings, - compression_method, - configuration, - partition_bucket, - partition_key - ); - } - -private: - const String format; - const Block sample_block; - const ContextPtr context; - const CompressionMethod compression_method; - const StorageS3::Configuration configuration; - const String bucket; - const String key; - const std::optional format_settings; - - static void validateBucket(const String & str) - { - S3::URI::validateBucket(str, {}); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); - - validatePartitionKey(str, false); - } - - static void validateKey(const String & str) - { - /// See: - /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html - /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject - - if (str.empty() || str.size() > 1024) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); - - if (!DB::UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); - - validatePartitionKey(str, true); - } -}; - - -StorageS3::StorageS3( - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , configuration(configuration_) - , name(configuration.url.storage_name) - , distributed_processing(distributed_processing_) - , format_settings(format_settings_) - , partition_by(partition_by_) -{ - updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) - - FormatFactory::instance().checkFormatName(configuration.format); - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - auto columns = getTableStructureFromDataImpl(configuration, format_settings, context_); - storage_metadata.setColumns(columns); - } - else - { - /// We don't allow special columns in S3 storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -static std::shared_ptr createFileIterator( - const StorageS3::Configuration & configuration, - bool distributed_processing, - ContextPtr local_context, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - StorageS3::KeysWithInfo * read_keys = nullptr, - std::function file_progress_callback = {}) -{ - if (distributed_processing) - { - return std::make_shared(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_shared( - *configuration.client, configuration.url, predicate, virtual_columns, - local_context, read_keys, configuration.request_settings, file_progress_callback); - } - else - { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) - { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); - } - - return std::make_shared( - *configuration.client, configuration.url.version_id, keys, - configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback); - } -} - -bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); -} - -bool StorageS3::prefersLargeBlocks() const -{ - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); -} - -bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const -{ - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); -} - -void StorageS3::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr local_context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns); - - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && local_context->getSettingsRef().optimize_count_from_files; - - auto reading = std::make_unique( - read_from_format_info.source_header, - column_names, - storage_snapshot, - *this, - std::move(read_from_format_info), - need_only_count, - local_context, - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromStorageS3Step::applyFilters() -{ - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - iterator_wrapper = createFileIterator( - query_configuration, storage.distributed_processing, local_context, predicate, - virtual_columns, nullptr, local_context->getFileProgressCallback()); -} - -void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (storage.partition_by && query_configuration.withWildcard()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); - - createIterator(nullptr); - - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); - if (estimated_keys_count > 1) - num_streams = std::min(num_streams, estimated_keys_count); - else - /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case. - num_streams = 1; - - const size_t max_threads = local_context->getSettingsRef().max_threads; - const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); - - Pipes pipes; - pipes.reserve(num_streams); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - read_from_format_info, - query_configuration.format, - storage.getName(), - local_context, - storage.format_settings, - max_block_size, - query_configuration.request_settings, - query_configuration.compression_method, - query_configuration.client, - query_configuration.url.bucket, - query_configuration.url.version_id, - query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()), - iterator_wrapper, - max_parsing_threads, - need_only_count); - - source->setKeyCondition(filter_nodes.nodes, local_context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(read_from_format_info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - auto sample_block = metadata_snapshot->getSampleBlock(); - auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); - auto insert_query = std::dynamic_pointer_cast(query); - - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withWildcard(); - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - query_configuration.keys.back()); - } - else - { - if (query_configuration.withGlobs()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert; - - if (!truncate_in_insert && S3::objectExists(*query_configuration.client, query_configuration.url.bucket, query_configuration.keys.back(), query_configuration.url.version_id, query_configuration.request_settings)) - { - if (local_context->getSettingsRef().s3_create_new_file_on_insert) - { - size_t index = query_configuration.keys.size(); - const auto & first_key = query_configuration.keys[0]; - auto pos = first_key.find_first_of('.'); - String new_key; - do - { - new_key = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos)); - ++index; - } - while (S3::objectExists(*query_configuration.client, query_configuration.url.bucket, new_key, query_configuration.url.version_id, query_configuration.request_settings)); - - query_configuration.keys.push_back(new_key); - configuration.keys.push_back(new_key); - } - else - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - query_configuration.url.bucket, query_configuration.keys.back()); - } - } - - return std::make_shared( - query_configuration.format, - sample_block, - local_context, - format_settings, - chosen_compression_method, - query_configuration, - query_configuration.url.bucket, - query_configuration.keys.back()); - } -} - -void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - auto query_configuration = updateConfigurationAndGetCopy(local_context); - - if (query_configuration.withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", - query_configuration.url.key); - } - - Aws::S3::Model::Delete delkeys; - - for (const auto & key : query_configuration.keys) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(key); - delkeys.AddObjects(std::move(obj)); - } - - ProfileEvents::increment(ProfileEvents::S3DeleteObjects); - S3::DeleteObjectsRequest request; - request.SetBucket(query_configuration.url.bucket); - request.SetDelete(delkeys); - - auto response = query_configuration.client->DeleteObjects(request); - - const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); - auto time_now = std::chrono::system_clock::now(); - if (auto blob_storage_log = BlobStorageLogWriter::create()) - { - for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - } - - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw S3Exception(err.GetMessage(), err.GetErrorType()); - } - - for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); -} - -StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(ContextPtr local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); - return configuration; -} - -void StorageS3::updateConfiguration(ContextPtr local_context) -{ - std::lock_guard lock(configuration_update_mutex); - configuration.update(local_context); -} - -void StorageS3::useConfiguration(const Configuration & new_configuration) -{ - std::lock_guard lock(configuration_update_mutex); - configuration = new_configuration; -} - -const StorageS3::Configuration & StorageS3::getConfiguration() -{ - std::lock_guard lock(configuration_update_mutex); - return configuration; -} - -bool StorageS3::Configuration::update(ContextPtr context) -{ - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); - - if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings))) - return false; - - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - connect(context); - return true; -} - -void StorageS3::Configuration::connect(ContextPtr context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size; - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - }; - - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); - client = S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - S3::CredentialsConfiguration{ - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }); -} - -void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) -{ - validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); - - auto filename = collection.getOrDefault("filename", ""); - if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); - else - configuration.url = S3::URI(collection.get("url")); - - configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); - configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); - configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); - configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - - configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); - configuration.structure = collection.getOrDefault("structure", "auto"); - - configuration.request_settings = S3Settings::RequestSettings(collection); -} - -StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file) -{ - StorageS3::Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - processNamedCollectionResult(configuration, *named_collection); - } - else - { - /// Supported signatures: - /// - /// S3('url') - /// S3('url', 'format') - /// S3('url', 'format', 'compression') - /// S3('url', NOSIGN) - /// S3('url', NOSIGN, 'format') - /// S3('url', NOSIGN, 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') - /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') - /// with optional headers() function - - size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); - - if (count == 0 || count > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 5 arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); - - std::unordered_map engine_args_to_idx; - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - engine_args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 2 possible variants: - /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) - engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; - else - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, access_key_id, secret_access_key, format) - /// - s3(source, NOSIGN, format, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - engine_args_to_idx = {{"format", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - } - else - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - } - /// For 5 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (count == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - } - else - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - } - else if (count == 6) - { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; - } - - /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(engine_args[0], "url")); - - if (engine_args_to_idx.contains("format")) - configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); - - if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); - - if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); - - if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); - - if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); - - if (no_sign_request) - configuration.auth_settings.no_sign_request = no_sign_request; - } - - configuration.static_configuration = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); - - configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.key, true); - - return configuration; -} - -ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx) -{ - return getTableStructureFromDataImpl(configuration, format_settings, ctx); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - const std::optional & format_settings_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) - { - } - - std::pair, std::optional> next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns}; - } - - while (true) - { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path " - "in S3 or all files are empty. You must specify table structure manually", - configuration.format); - - return {nullptr, std::nullopt}; - } - - /// S3 file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - return {nullptr, columns_from_cache}; - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) - { - first = false; - return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; }); - auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->key; - return ""; - } - - private: - std::optional tryGetColumnsFromCache( - const StorageS3::KeysWithInfo::const_iterator & begin, - const StorageS3::KeysWithInfo::const_iterator & end) - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(getContext()); - for (auto it = begin; it < end; ++it) - { - auto get_last_mod_time = [&] - { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*for_disk_s3=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - - String path = fs::path(configuration.url.bucket) / (*it)->key; - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext()); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; - } - - return std::nullopt; - } - - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; - }; - -} - -ColumnsDescription StorageS3::getTableStructureFromDataImpl( - const Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx) -{ - KeysWithInfo read_keys; - - auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys); - - ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx); - return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx); -} - -void registerStorageS3Impl(const String & name, StorageFactory & factory) -{ - factory.registerStorage(name, [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - if (args.storage_def->settings) - { - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - } - - // Apply changes from SETTINGS clause, with validation. - user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - std::move(configuration), - args.getContext(), - args.table_id, - args.columns, - args.constraints, - args.comment, - format_settings, - /* distributed_processing_ */false, - partition_by); - }, - { - .supports_settings = true, - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} - -void registerStorageS3(StorageFactory & factory) -{ - return registerStorageS3Impl("S3", factory); -} - -void registerStorageCOS(StorageFactory & factory) -{ - return registerStorageS3Impl("COSN", factory); -} - -void registerStorageOSS(StorageFactory & factory) -{ - return registerStorageS3Impl("OSS", factory); -} - -NamesAndTypesList StorageS3::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageS3::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - -bool StorageS3::supportsPartitionBy() const -{ - return true; -} - -SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h deleted file mode 100644 index 81a03cc5ad5..00000000000 --- a/src/Storages/StorageS3.h +++ /dev/null @@ -1,399 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace Aws::S3 -{ - class Client; -} - -namespace DB -{ - -class PullingPipelineExecutor; -class NamedCollection; - -class StorageS3Source : public SourceWithKeyCondition, WithContext -{ -public: - - struct KeyWithInfo - { - KeyWithInfo() = default; - - explicit KeyWithInfo(String key_, std::optional info_ = std::nullopt) - : key(std::move(key_)), info(std::move(info_)) {} - - virtual ~KeyWithInfo() = default; - - String key; - std::optional info; - }; - using KeyWithInfoPtr = std::shared_ptr; - - using KeysWithInfo = std::vector; - - class IIterator - { - public: - virtual ~IIterator() = default; - virtual KeyWithInfoPtr next(size_t idx = 0) = 0; /// NOLINT - - /// Estimates how many streams we need to process all files. - /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys. - /// Intended to be called before any next() calls, may underestimate otherwise - /// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results. - virtual size_t estimatedKeysCount() = 0; - - KeyWithInfoPtr operator ()() { return next(); } - }; - - class DisclosedGlobIterator : public IIterator - { - public: - DisclosedGlobIterator( - const S3::Client & client_, - const S3::URI & globbed_uri_, - const ActionsDAG::Node * predicate, - const NamesAndTypesList & virtual_columns, - ContextPtr context, - KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}, - std::function progress_callback_ = {}); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class KeysIterator : public IIterator - { - public: - explicit KeysIterator( - const S3::Client & client_, - const std::string & version_id_, - const std::vector & keys_, - const String & bucket_, - const S3Settings::RequestSettings & request_settings_, - KeysWithInfo * read_keys = nullptr, - std::function progress_callback_ = {}); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - class Impl; - /// shared_ptr to have copy constructor - std::shared_ptr pimpl; - }; - - class ReadTaskIterator : public IIterator - { - public: - explicit ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); - - KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT - size_t estimatedKeysCount() override; - - private: - KeysWithInfo buffer; - std::atomic_size_t index = 0; - - ReadTaskCallback callback; - }; - - StorageS3Source( - const ReadFromFormatInfo & info, - const String & format, - String name_, - ContextPtr context_, - std::optional format_settings_, - UInt64 max_block_size_, - const S3Settings::RequestSettings & request_settings_, - String compression_hint_, - const std::shared_ptr & client_, - const String & bucket, - const String & version_id, - const String & url_host_and_port, - std::shared_ptr file_iterator_, - size_t max_parsing_threads, - bool need_only_count_); - - ~StorageS3Source() override; - - String getName() const override; - - void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override - { - setKeyConditionImpl(nodes, context_, sample_block); - } - - Chunk generate() override; - -private: - friend class StorageS3QueueSource; - - String name; - String bucket; - String version_id; - String url_host_and_port; - String format; - ColumnsDescription columns_desc; - NamesAndTypesList requested_columns; - UInt64 max_block_size; - S3Settings::RequestSettings request_settings; - String compression_hint; - std::shared_ptr client; - Block sample_block; - std::optional format_settings; - - struct ReaderHolder - { - public: - ReaderHolder( - KeyWithInfoPtr key_with_info_, - String bucket_, - std::unique_ptr read_buf_, - std::shared_ptr source_, - std::unique_ptr pipeline_, - std::unique_ptr reader_) - : key_with_info(key_with_info_) - , bucket(std::move(bucket_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } - - ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; - - ReaderHolder(ReaderHolder && other) noexcept - { - *this = std::move(other); - } - - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - key_with_info = std::move(other.key_with_info); - bucket = std::move(other.bucket); - return *this; - } - - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } - String getPath() const { return fs::path(bucket) / key_with_info->key; } - const String & getFile() const { return key_with_info->key; } - const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; } - std::optional getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; } - - const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } - - private: - KeyWithInfoPtr key_with_info; - String bucket; - std::unique_ptr read_buf; - std::shared_ptr source; - std::unique_ptr pipeline; - std::unique_ptr reader; - }; - - ReaderHolder reader; - - NamesAndTypesList requested_virtual_columns; - std::shared_ptr file_iterator; - size_t max_parsing_threads = 1; - bool need_only_count; - - LoggerPtr log = getLogger("StorageS3Source"); - - ThreadPool create_reader_pool; - ThreadPoolCallbackRunner create_reader_scheduler; - std::future reader_future; - std::atomic initialized{false}; - - size_t total_rows_in_file = 0; - - /// Notice: we should initialize reader and future_reader lazily in generate to make sure key_condition - /// is set before createReader is invoked for key_condition is read in createReader. - void lazyInitialize(size_t idx = 0); - - /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t idx = 0); - std::future createReaderAsync(size_t idx = 0); - - std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); - std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); - - void addNumRowsToCache(const String & key, size_t num_rows); - std::optional tryGetNumRowsFromCache(const KeyWithInfo & key_with_info); -}; - -/** - * This class represents table engine for external S3 urls. - * It sends HTTP GET to server when select is called and - * HTTP PUT when insert is called. - */ -class StorageS3 : public IStorage -{ -public: - struct Configuration : public StatelessTableEngineConfiguration - { - Configuration() = default; - - String getPath() const { return url.key; } - - bool update(ContextPtr context); - - void connect(ContextPtr context); - - bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; } - - bool withWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos - || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; - } - - S3::URI url; - S3::AuthSettings auth_settings; - S3Settings::RequestSettings request_settings; - /// If s3 configuration was passed from ast, then it is static. - /// If from config - it can be changed with config reload. - bool static_configuration = true; - /// Headers from ast is a part of static configuration. - HTTPHeaderEntries headers_from_ast; - - std::shared_ptr client; - std::vector keys; - }; - - StorageS3( - const Configuration & configuration_, - ContextPtr context_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - std::optional format_settings_, - bool distributed_processing_ = false, - ASTPtr partition_by_ = nullptr); - - String getName() const override - { - return name; - } - - void read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; - - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; - - NamesAndTypesList getVirtuals() const override; - static Names getVirtualColumnNames(); - - bool supportsPartitionBy() const override; - - static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection); - - static SchemaCache & getSchemaCache(const ContextPtr & ctx); - - static StorageS3::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file = true); - - static ColumnsDescription getTableStructureFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx); - - using KeysWithInfo = StorageS3Source::KeysWithInfo; - - bool supportsTrivialCountOptimization() const override { return true; } - -protected: - virtual Configuration updateConfigurationAndGetCopy(ContextPtr local_context); - - virtual void updateConfiguration(ContextPtr local_context); - - void useConfiguration(const Configuration & new_configuration); - - const Configuration & getConfiguration(); - -private: - friend class StorageS3Cluster; - friend class TableFunctionS3Cluster; - friend class StorageS3Queue; - friend class ReadFromStorageS3Step; - - Configuration configuration; - std::mutex configuration_update_mutex; - NamesAndTypesList virtual_columns; - - String name; - const bool distributed_processing; - std::optional format_settings; - ASTPtr partition_by; - - static ColumnsDescription getTableStructureFromDataImpl( - const Configuration & configuration, - const std::optional & format_settings, - ContextPtr ctx); - - bool supportsSubcolumns() const override { return true; } - - bool supportsSubsetOfColumns(const ContextPtr & context) const; - - bool prefersLargeBlocks() const override; - - bool parallelizeOutputAfterReading(ContextPtr context) const override; -}; - -} - -#endif diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp deleted file mode 100644 index 25c2b42b766..00000000000 --- a/src/Storages/StorageS3Cluster.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "Storages/StorageS3Cluster.h" - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -StorageS3Cluster::StorageS3Cluster( - const String & cluster_name_, - const StorageS3::Configuration & configuration_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) - , s3_configuration{configuration_} -{ - context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); - context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); - - StorageInMemoryMetadata storage_metadata; - updateConfigurationIfChanged(context_); - - if (columns_.empty()) - { - /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function - auto columns = StorageS3::getTableStructureFromDataImpl(s3_configuration, /*format_settings=*/std::nullopt, context_); - storage_metadata.setColumns(columns); - } - else - storage_metadata.setColumns(columns_); - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); - - TableFunctionS3Cluster::addColumnsStructureToArguments(expression_list->children, structure, context); -} - -void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) -{ - s3_configuration.update(local_context); -} - -RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const -{ - auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, predicate, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); - - auto callback = std::make_shared>([iterator]() mutable -> String - { - if (auto next = iterator->next()) - return next->key; - return ""; - }); - return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; -} - -NamesAndTypesList StorageS3Cluster::getVirtuals() const -{ - return virtual_columns; -} - - -} - -#endif diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h deleted file mode 100644 index c526f14834a..00000000000 --- a/src/Storages/StorageS3Cluster.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include - -#include "Client/Connection.h" -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -class StorageS3Cluster : public IStorageCluster -{ -public: - StorageS3Cluster( - const String & cluster_name_, - const StorageS3::Configuration & configuration_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_); - - std::string getName() const override { return "S3Cluster"; } - - NamesAndTypesList getVirtuals() const override; - - RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override; - - bool supportsSubcolumns() const override { return true; } - - bool supportsTrivialCountOptimization() const override { return true; } - -protected: - void updateConfigurationIfChanged(ContextPtr local_context); - -private: - void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); } - - void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; - - StorageS3::Configuration s3_configuration; - NamesAndTypesList virtual_columns; -}; - - -} - -#endif diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 1426ea83800..77d5be3698c 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -1,9 +1,7 @@ #include #include -#include #include -#include -#include +#include #include #include #include @@ -83,7 +81,7 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); #if USE_AZURE_BLOB_STORAGE - fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); + fillDataImpl(res_columns, StorageAzureBlobStorage::getSchemaCache(context), "Azure"); /// FIXME #endif } diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index dea9feaf28b..0b72d7e94fd 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -45,8 +45,6 @@ void registerStorageIceberg(StorageFactory & factory); #endif #if USE_HDFS -void registerStorageHDFS(StorageFactory & factory); - #if USE_HIVE void registerStorageHive(StorageFactory & factory); #endif @@ -99,9 +97,7 @@ void registerStorageSQLite(StorageFactory & factory); void registerStorageKeeperMap(StorageFactory & factory); -#if USE_AZURE_BLOB_STORAGE -void registerStorageAzureBlob(StorageFactory & factory); -#endif +void registerStorageObjectStorage(StorageFactory & factory); void registerStorages() { @@ -131,9 +127,7 @@ void registerStorages() #endif #if USE_AWS_S3 - registerStorageS3(factory); - registerStorageCOS(factory); - registerStorageOSS(factory); + // registerStorageS3(factory); registerStorageHudi(factory); registerStorageS3Queue(factory); @@ -148,12 +142,9 @@ void registerStorages() #endif #if USE_HDFS - registerStorageHDFS(factory); - #if USE_HIVE registerStorageHive(factory); #endif - #endif registerStorageODBC(factory); @@ -201,9 +192,7 @@ void registerStorages() registerStorageKeeperMap(factory); - #if USE_AZURE_BLOB_STORAGE - registerStorageAzureBlob(factory); - #endif + registerStorageObjectStorage(factory); } } diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index 7e81d6d21b7..0559472325b 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -1,14 +1,10 @@ #pragma once -#include "config.h" - #include #include -#include #include #include -#include -#include +#include namespace DB diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 961e5683fe2..884e1f5c4a2 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -10,6 +10,9 @@ # include # include # include +#include +#include +#include namespace DB { @@ -30,12 +33,13 @@ protected: bool /*is_insert_query*/) const override { ColumnsDescription columns; - if (TableFunction::configuration.structure != "auto") - columns = parseColumnsListFromString(TableFunction::configuration.structure, context); + if (TableFunction::configuration->structure != "auto") + columns = parseColumnsListFromString(TableFunction::configuration->structure, context); - StoragePtr storage = Storage::create( - TableFunction::configuration, context, false, StorageID(TableFunction::getDatabaseName(), table_name), - columns, ConstraintsDescription{}, String{}, std::nullopt); + StorageObjectStorageConfigurationPtr configuration = TableFunction::configuration; + StoragePtr storage = StorageIceberg>::create( + configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name), + columns, ConstraintsDescription{}, String{}, std::nullopt, false); storage->startup(); return storage; @@ -45,19 +49,19 @@ protected: ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override { - if (TableFunction::configuration.structure == "auto") + if (TableFunction::configuration->structure == "auto") { context->checkAccess(TableFunction::getSourceAccessType()); - return Storage::getTableStructureFromData(TableFunction::configuration, std::nullopt, context); + return Storage::getTableStructureFromData(TableFunction::object_storage, TableFunction::configuration, std::nullopt, context); } - return parseColumnsListFromString(TableFunction::configuration.structure, context); + return parseColumnsListFromString(TableFunction::configuration->structure, context); } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override { /// Set default format to Parquet if it's not specified in arguments. - TableFunction::configuration.format = "Parquet"; + TableFunction::configuration->format = "Parquet"; TableFunction::parseArguments(ast_function, context); } }; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp deleted file mode 100644 index b098cac5144..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ /dev/null @@ -1,323 +0,0 @@ -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "registerTableFunctions.h" -#include -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - -bool isConnectionString(const std::string & candidate) -{ - return !candidate.starts_with("http"); -} - -} - -void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) -{ - /// Supported signatures: - /// - /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) - /// - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - { - StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection); - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - } - else - { - if (engine_args.size() < 3 || engine_args.size() > 8) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; - - if (engine_args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - } - else - { - configuration.structure = fourth_arg; - } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); - if (is_format_arg(sixth_arg)) - configuration.format = sixth_arg; - else - configuration.structure = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - } - else if (engine_args.size() == 8) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); - } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - } -} - -void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Clone ast function, because we can modify its arguments like removing headers. - auto ast_copy = ast_function->clone(); - - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - auto & args = args_func.at(0)->children; - - parseArgumentsImpl(args, context); -} - -void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) -{ - if (tryGetNamedCollectionWithOverrides(args, context)) - { - /// In case of named collection, just add key-value pair "structure='...'" - /// at the end of arguments to override existed structure. - ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); - args.push_back(equal_func); - } - else - { - if (args.size() < 3 || args.size() > 8) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - auto structure_literal = std::make_shared(structure); - - auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; - - - if (args.size() == 3) - { - /// Add format=auto & compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - else if (args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) - { - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - else - { - args.back() = structure_literal; - } - } - else if (args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - if (!is_format_arg(fourth_arg)) - { - /// Add format=auto & compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(std::make_shared("auto")); - } - args.push_back(structure_literal); - } - else if (args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - if (!is_format_arg(fourth_arg)) - { - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - else - { - args.back() = structure_literal; - } - } - else if (args.size() == 7) - { - args.push_back(structure_literal); - } - else if (args.size() == 8) - { - args.back() = structure_literal; - } - } -} - -ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const -{ - if (configuration.structure == "auto") - { - context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false); - } - - return parseColumnsListFromString(configuration.structure, context); -} - -bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); -} - -std::unordered_set TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageAzureBlob::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - -StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); - else if (!structure_hint.empty()) - columns = structure_hint; - - StoragePtr storage = std::make_shared( - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - String{}, - /// No format_settings for table function Azure - std::nullopt, - /* distributed_processing */ false, - nullptr); - - storage->startup(); - - return storage; -} - -void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage.)", - .examples{{"azureBlobStorage", "SELECT * FROM azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h deleted file mode 100644 index 1a221f60c55..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - - -namespace DB -{ - -class Context; - -/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob. - */ -class TableFunctionAzureBlobStorage : public ITableFunction -{ -public: - static constexpr auto name = "azureBlobStorage"; - - static constexpr auto signature = " - connection_string, container_name, blobpath\n" - " - connection_string, container_name, blobpath, structure \n" - " - connection_string, container_name, blobpath, format \n" - " - connection_string, container_name, blobpath, format, compression \n" - " - connection_string, container_name, blobpath, format, compression, structure \n" - " - storage_account_url, container_name, blobpath, account_name, account_key\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" - " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; - - static size_t getMaxNumberOfArguments() { return 8; } - - String getName() const override - { - return name; - } - - virtual String getSignature() const - { - return signature; - } - - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - - static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); - -protected: - - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "Azure"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - mutable StorageAzureBlob::Configuration configuration; - ColumnsDescription structure_hint; -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp deleted file mode 100644 index 1c3b302a186..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( - const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - StoragePtr storage; - ColumnsDescription columns; - bool structure_argument_was_provided = configuration.structure != "auto"; - - if (structure_argument_was_provided) - { - columns = parseColumnsListFromString(configuration.structure, context); - } - else if (!structure_hint.empty()) - { - columns = structure_hint; - } - - auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); - auto settings = StorageAzureBlob::createSettings(context); - - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this filename won't contains globs - storage = std::make_shared( - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings - /* distributed_processing */ true, - /*partition_by_=*/nullptr); - } - else - { - storage = std::make_shared( - cluster_name, - configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - context, - structure_argument_was_provided); - } - - storage->startup(); - - return storage; -} - - -void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", - .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, - .allow_readonly = false} - ); -} - - -} - -#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h deleted file mode 100644 index 58f79328f63..00000000000 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) - * A table function, which allows to process many files from Azure Blob Storage on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in Azure Blob Storage file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "azureBlobStorageCluster"; - static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp index b8bf810f6fa..08b62ed2612 100644 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ b/src/TableFunctions/TableFunctionDeltaLake.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "registerTableFunctions.h" namespace DB @@ -16,17 +16,17 @@ struct TableFunctionDeltaLakeName static constexpr auto name = "deltaLake"; }; -using TableFunctionDeltaLake = ITableFunctionDataLake; - -void registerTableFunctionDeltaLake(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation = { - .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", - .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} +// using TableFunctionDeltaLake = ITableFunctionDataLake; +// +// void registerTableFunctionDeltaLake(TableFunctionFactory & factory) +// { +// factory.registerFunction( +// {.documentation = { +// .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", +// .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, +// .categories{"DataLake"}}, +// .allow_readonly = false}); +// } } diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp deleted file mode 100644 index 8d48a7ba30e..00000000000 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "config.h" -#include "registerTableFunctions.h" - -#if USE_HDFS -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -StoragePtr TableFunctionHDFS::getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const -{ - return std::make_shared( - source, - StorageID(getDatabaseName(), table_name), - format_, - columns, - ConstraintsDescription{}, - String{}, - global_context, - compression_method_); -} - -ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - if (structure == "auto") - { - context->checkAccess(getSourceAccessType()); - return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); - } - - return parseColumnsListFromString(structure, context); -} - -std::unordered_set TableFunctionHDFS::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageHDFS::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - -void registerTableFunctionHDFS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} -#endif diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h deleted file mode 100644 index 3a719496b26..00000000000 --- a/src/TableFunctions/TableFunctionHDFS.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include - - -namespace DB -{ - -class Context; - -/* hdfs(URI, [format, structure, compression]) - creates a temporary storage from hdfs files - * - */ -class TableFunctionHDFS : public ITableFunctionFileLike -{ -public: - static constexpr auto name = "hdfs"; - static constexpr auto signature = " - uri\n" - " - uri, format\n" - " - uri, format, structure\n" - " - uri, format, structure, compression_method\n"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - -private: - StoragePtr getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const override; - const char * getStorageTypeName() const override { return "HDFS"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp deleted file mode 100644 index 6fb7ed0fce5..00000000000 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include - -#include -#include -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionHDFSCluster::getStorage( - const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, - const std::string & table_name, const String & /*compression_method_*/) const -{ - StoragePtr storage; - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this uri won't contains globs - storage = std::make_shared( - filename, - StorageID(getDatabaseName(), table_name), - format, - columns, - ConstraintsDescription{}, - String{}, - context, - compression_method, - /*distributed_processing=*/true, - nullptr); - } - else - { - storage = std::make_shared( - context, - cluster_name, - filename, - StorageID(getDatabaseName(), table_name), - format, - columns, - ConstraintsDescription{}, - compression_method, - structure != "auto"); - } - return storage; -} - -void registerTableFunctionHDFSCluster(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h deleted file mode 100644 index 0253217feb7..00000000000 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * hdfsCluster(cluster, URI, format, structure, compression_method) - * A table function, which allows to process many files from HDFS on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in HDFS file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionHDFSCluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "hdfsCluster"; - static constexpr auto signature = " - cluster_name, uri\n" - " - cluster_name, uri, format\n" - " - cluster_name, uri, format, structure\n" - " - cluster_name, uri, format, structure, compression_method\n"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr getStorage( - const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, - const std::string & table_name, const String & compression_method_) const override; - - const char * getStorageTypeName() const override { return "HDFSCluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp index 436e708b72d..c6d84504c40 100644 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ b/src/TableFunctions/TableFunctionHudi.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "registerTableFunctions.h" namespace DB @@ -15,17 +15,17 @@ struct TableFunctionHudiName { static constexpr auto name = "hudi"; }; -using TableFunctionHudi = ITableFunctionDataLake; - -void registerTableFunctionHudi(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the Hudi table stored on object store.)", - .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} +// using TableFunctionHudi = ITableFunctionDataLake; +// +// void registerTableFunctionHudi(TableFunctionFactory & factory) +// { +// factory.registerFunction( +// {.documentation +// = {.description=R"(The table function can be used to read the Hudi table stored on object store.)", +// .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, +// .categories{"DataLake"}}, +// .allow_readonly = false}); +// } } #endif diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp index d37aace01c6..1a28f9292d1 100644 --- a/src/TableFunctions/TableFunctionIceberg.cpp +++ b/src/TableFunctions/TableFunctionIceberg.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "registerTableFunctions.h" @@ -17,7 +17,10 @@ struct TableFunctionIcebergName static constexpr auto name = "iceberg"; }; -using TableFunctionIceberg = ITableFunctionDataLake; +using TableFunctionIceberg = ITableFunctionDataLake< + TableFunctionIcebergName, + StorageIceberg, + TableFunctionS3>; void registerTableFunctionIceberg(TableFunctionFactory & factory) { diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp new file mode 100644 index 00000000000..d009a9347f3 --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -0,0 +1,224 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registerTableFunctions.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +static void initializeConfiguration( + StorageObjectStorageConfiguration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); +} + +template +ObjectStoragePtr TableFunctionObjectStorage::getObjectStorage(const ContextPtr & context, bool create_readonly) const +{ + if (!object_storage) + object_storage = configuration->createOrUpdateObjectStorage(context, create_readonly); + return object_storage; +} + +template +std::vector TableFunctionObjectStorage::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + return result; +} + +template +void TableFunctionObjectStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +{ + Configuration::addStructureToArgs(args, structure, context); +} + +template +void TableFunctionObjectStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) +{ + configuration = std::make_shared(); + initializeConfiguration(*configuration, engine_args, local_context, true); +} + +template +void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + /// Clone ast function, because we can modify its arguments like removing headers. + auto ast_copy = ast_function->clone(); + ASTs & args_func = ast_copy->children; + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); + + auto & args = args_func.at(0)->children; + parseArgumentsImpl(args, context); +} + +template +ColumnsDescription TableFunctionObjectStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const +{ + if (configuration->structure == "auto") + { + context->checkAccess(getSourceAccessType()); + auto storage = getObjectStorage(context, !is_insert_query); + return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context); + } + + return parseColumnsListFromString(configuration->structure, context); +} + +template +bool TableFunctionObjectStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); +} + +template +std::unordered_set TableFunctionObjectStorage::getVirtualsToCheckBeforeUsingStructureHint() const +{ + auto virtual_column_names = StorageObjectStorage::getVirtualColumnNames(); + return {virtual_column_names.begin(), virtual_column_names.end()}; +} + +template +StoragePtr TableFunctionObjectStorage::executeImpl( + const ASTPtr & /* ast_function */, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const +{ + ColumnsDescription columns; + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; + + StoragePtr storage = std::make_shared>( + configuration, + getObjectStorage(context, !is_insert_query), + Definition::storage_type_name, + context, + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + String{}, + /// No format_settings for table function Azure + std::nullopt, + /* distributed_processing */ false, + nullptr); + + storage->startup(); + return storage; +} + +void registerTableFunctionObjectStorage(TableFunctionFactory & factory) +{ +#if USE_AWS_S3 + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on AWS S3.)", + .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, + .allow_readonly = false + }); + + factory.registerFunction>( + { + .allow_readonly = false + }); + + factory.registerFunction>( + { + .allow_readonly = false + }); + factory.registerFunction>( + { + .allow_readonly = false + }); +#endif + +#if USE_AZURE_BLOB_STORAGE + factory.registerFunction>( + { + .documentation = + { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage.)", + .examples{ + { + "azureBlobStorage", + "SELECT * FROM azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure])", "" + }} + }, + .allow_readonly = false + }); +#endif +#if USE_HDFS + factory.registerFunction>( + { + .allow_readonly = false + }); +#endif +} + +#if USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_AWS_S3 +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +#if USE_HDFS +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +#endif + +} diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h new file mode 100644 index 00000000000..1df0ba2f843 --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -0,0 +1,150 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include + + +namespace DB +{ + +class Context; +class StorageS3Configuration; +class StorageAzureBlobConfiguration; +class StorageHDFSConfiguration; +struct S3StorageSettings; +struct AzureStorageSettings; +struct HDFSStorageSettings; + +struct AzureDefinition +{ + static constexpr auto name = "azureBlobStorage"; + static constexpr auto storage_type_name = "Azure"; + static constexpr auto signature = " - connection_string, container_name, blobpath\n" + " - connection_string, container_name, blobpath, structure \n" + " - connection_string, container_name, blobpath, format \n" + " - connection_string, container_name, blobpath, format, compression \n" + " - connection_string, container_name, blobpath, format, compression, structure \n" + " - storage_account_url, container_name, blobpath, account_name, account_key\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; +}; + +struct S3Definition +{ + static constexpr auto name = "s3"; + static constexpr auto storage_type_name = "S3"; + static constexpr auto signature = " - url\n" + " - url, format\n" + " - url, format, structure\n" + " - url, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key\n" + " - url, access_key_id, secret_access_key, session_token\n" + " - url, access_key_id, secret_access_key, format\n" + " - url, access_key_id, secret_access_key, session_token, format\n" + " - url, access_key_id, secret_access_key, format, structure\n" + " - url, access_key_id, secret_access_key, session_token, format, structure\n" + " - url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; +}; + +struct GCSDefinition +{ + static constexpr auto name = "gcs"; + static constexpr auto storage_type_name = "GCS"; + static constexpr auto signature = S3Definition::signature; +}; + +struct COSNDefinition +{ + static constexpr auto name = "cosn"; + static constexpr auto storage_type_name = "COSN"; + static constexpr auto signature = S3Definition::signature; +}; + +struct OSSDefinition +{ + static constexpr auto name = "oss"; + static constexpr auto storage_type_name = "OSS"; + static constexpr auto signature = S3Definition::signature; +}; + +struct HDFSDefinition +{ + static constexpr auto name = "hdfs"; + static constexpr auto storage_type_name = "HDFS"; + static constexpr auto signature = " - uri\n" + " - uri, format\n" + " - uri, format, structure\n" + " - uri, format, structure, compression_method\n"; +}; + +template +class TableFunctionObjectStorage : public ITableFunction +{ +public: + static constexpr auto name = Definition::name; + static constexpr auto signature = Definition::signature; + + static size_t getMaxNumberOfArguments() { return 8; } + + String getName() const override { return name; } + + virtual String getSignature() const { return signature; } + + bool hasStaticStructure() const override { return configuration->structure != "auto"; } + + bool needStructureHint() const override { return configuration->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; + + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; + + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); + + static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return Definition::storage_type_name; } + + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const; + + mutable typename StorageObjectStorage::ConfigurationPtr configuration; + mutable ObjectStoragePtr object_storage; + ColumnsDescription structure_hint; + + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; +}; + +#if USE_AWS_S3 +using TableFunctionS3 = TableFunctionObjectStorage; +#endif + +#if USE_AZURE_BLOB_STORAGE +using TableFunctionAzureBlob = TableFunctionObjectStorage; +#endif + +#if USE_HDFS +using TableFunctionHDFS = TableFunctionObjectStorage; +#endif +} + +#endif diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp new file mode 100644 index 00000000000..1d27a857cea --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -0,0 +1,113 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +template +StoragePtr TableFunctionObjectStorageCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const +{ + using Base = TableFunctionObjectStorage; + + StoragePtr storage; + ColumnsDescription columns; + bool structure_argument_was_provided = Base::configuration->structure != "auto"; + + if (structure_argument_was_provided) + { + columns = parseColumnsListFromString(Base::configuration->structure, context); + } + else if (!Base::structure_hint.empty()) + { + columns = Base::structure_hint; + } + + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + storage = std::make_shared>( + Base::configuration, + Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query), + Definition::storage_type_name, + context, + StorageID(Base::getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + /* comment */String{}, + /* format_settings */std::nullopt, /// No format_settings + /* distributed_processing */ true, + /*partition_by_=*/nullptr); + } + else + { + storage = std::make_shared>( + ITableFunctionCluster::cluster_name, + Base::configuration, + Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query), + Definition::storage_type_name, + StorageID(Base::getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + context, + structure_argument_was_provided); + } + + storage->startup(); + return storage; +} + + +void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) +{ +#if USE_AWS_S3 + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false + } + ); +#endif + +#if USE_AZURE_BLOB_STORAGE + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false + } + ); +#endif + +#if USE_HDFS + factory.registerFunction(); +#endif +} + +#if USE_AWS_S3 +template class TableFunctionObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +template class TableFunctionObjectStorageCluster; +#endif + +#if USE_HDFS +template class TableFunctionObjectStorageCluster; +#endif +} diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h new file mode 100644 index 00000000000..461456e37df --- /dev/null +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -0,0 +1,91 @@ +#pragma once +#include "config.h" +#include +#include +#include + + +namespace DB +{ + +class Context; + +class StorageS3Settings; +class StorageAzureBlobSettings; +class StorageS3Configuration; +class StorageAzureBlobConfiguration; + +struct AzureClusterDefinition +{ + /** + * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) + * A table function, which allows to process many files from Azure Blob Storage on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks + * in Azure Blob Storage file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ + static constexpr auto name = "azureBlobStorageCluster"; + static constexpr auto storage_type_name = "AzureBlobStorageCluster"; + static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; +}; + +struct S3ClusterDefinition +{ + static constexpr auto name = "s3Cluster"; + static constexpr auto storage_type_name = "S3Cluster"; + static constexpr auto signature = " - cluster, url\n" + " - cluster, url, format\n" + " - cluster, url, format, structure\n" + " - cluster, url, access_key_id, secret_access_key\n" + " - cluster, url, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, format\n" + " - cluster, url, access_key_id, secret_access_key, format, structure\n" + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; +}; + +struct HDFSClusterDefinition +{ + static constexpr auto name = "hdfsCluster"; + static constexpr auto storage_type_name = "HDFSCluster"; + static constexpr auto signature = " - cluster_name, uri\n" + " - cluster_name, uri, format\n" + " - cluster_name, uri, format, structure\n" + " - cluster_name, uri, format, structure, compression_method\n"; +}; + +template +class TableFunctionObjectStorageCluster : public ITableFunctionCluster> +{ +public: + static constexpr auto name = Definition::name; + static constexpr auto signature = Definition::signature; + + String getName() const override { return name; } + String getSignature() const override { return signature; } + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return Definition::storage_type_name; } +}; + +#if USE_AWS_S3 +using TableFunctionS3Cluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_AZURE_BLOB_STORAGE +using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; +#endif + +#if USE_HDFS +using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster; +#endif +} diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp deleted file mode 100644 index a9c5a5c99f0..00000000000 --- a/src/TableFunctions/TableFunctionS3.cpp +++ /dev/null @@ -1,464 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "registerTableFunctions.h" -#include -#include - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - - -std::vector TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const -{ - auto & table_function_node = query_node_table_function->as(); - auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); - size_t table_function_arguments_size = table_function_arguments_nodes.size(); - - std::vector result; - - for (size_t i = 0; i < table_function_arguments_size; ++i) - { - auto * function_node = table_function_arguments_nodes[i]->as(); - if (function_node && function_node->getFunctionName() == "headers") - result.push_back(i); - } - - return result; -} - -/// This is needed to avoid copy-paste. Because s3Cluster arguments only differ in additional argument (first) - cluster name -void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) - { - StorageS3::processNamedCollectionResult(configuration, *named_collection); - if (configuration.format == "auto") - { - String file_path = named_collection->getOrDefault("filename", Poco::URI(named_collection->get("url")).getPath()); - configuration.format = FormatFactory::instance().getFormatFromFileName(file_path, true); - } - } - else - { - - size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context); - - if (count == 0 || count > 7) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - - std::unordered_map args_to_idx; - - bool no_sign_request = false; - - /// For 2 arguments we support 2 possible variants: - /// - s3(source, format) - /// - s3(source, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - no_sign_request = true; - else - args_to_idx = {{"format", 1}}; - } - /// For 3 arguments we support 3 possible variants: - /// - s3(source, format, structure) - /// - s3(source, access_key_id, secret_access_key) - /// - s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}}; - } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) - args_to_idx = {{"format", 1}, {"structure", 2}}; - else - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; - } - /// For 4 arguments we support 4 possible variants: - /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, secret_access_key, format), - /// - s3(source, access_key_id, secret_access_key, session_token) - /// - s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}, {"structure", 3}}; - } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) - { - args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } - } - } - /// For 5 arguments we support 3 possible variants: - /// - s3(source, access_key_id, secret_access_key, format, structure) - /// - s3(source, access_key_id, secret_access_key, session_token, format) - /// - s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, - /// and by the 4-th argument, check if it's a format name or not - else if (count == 5) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); - if (boost::iequals(second_arg, "NOSIGN")) - { - no_sign_request = true; - args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}}; - } - else - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } - } - } - // For 6 arguments we support 2 possible variants: - /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) - /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) - /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not - else if (count == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; - } - else - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; - } - } - else if (count == 7) - { - args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; - } - - /// This argument is always the first - String url = checkAndGetLiteralArgument(args[0], "url"); - configuration.url = S3::URI(url); - - if (args_to_idx.contains("format")) - { - auto format = checkAndGetLiteralArgument(args[args_to_idx["format"]], "format"); - /// Set format to configuration only of it's not 'auto', - /// because we can have default format set in configuration. - if (format != "auto") - configuration.format = format; - } - - if (args_to_idx.contains("structure")) - configuration.structure = checkAndGetLiteralArgument(args[args_to_idx["structure"]], "structure"); - - if (args_to_idx.contains("compression_method")) - configuration.compression_method = checkAndGetLiteralArgument(args[args_to_idx["compression_method"]], "compression_method"); - - if (args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(args[args_to_idx["access_key_id"]], "access_key_id"); - - if (args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); - - if (args_to_idx.contains("session_token")) - configuration.auth_settings.session_token = checkAndGetLiteralArgument(args[args_to_idx["session_token"]], "session_token"); - - configuration.auth_settings.no_sign_request = no_sign_request; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true); - } - - configuration.keys = {configuration.url.key}; -} - -void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Clone ast function, because we can modify its arguments like removing headers. - auto ast_copy = ast_function->clone(); - - /// Parse args - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - auto & args = args_func.at(0)->children; - - parseArgumentsImpl(args, context); -} - -void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) -{ - if (tryGetNamedCollectionWithOverrides(args, context)) - { - /// In case of named collection, just add key-value pair "structure='...'" - /// at the end of arguments to override existed structure. - ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; - auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); - args.push_back(equal_func); - } - else - { - HTTPHeaderEntries tmp_headers; - size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); - - if (count == 0 || count > getMaxNumberOfArguments()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count); - - auto structure_literal = std::make_shared(structure); - - /// s3(s3_url) - if (count == 1) - { - /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// s3(s3_url, format) or s3(s3_url, NOSIGN) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - else if (count == 2) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - /// If there is NOSIGN, add format=auto before structure. - if (boost::iequals(second_arg, "NOSIGN")) - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - /// s3(source, format, structure) or - /// s3(source, access_key_id, secret_access_key) or - /// s3(source, NOSIGN, format) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (count == 3) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - args.push_back(structure_literal); - } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) - { - args[count - 1] = structure_literal; - } - else - { - /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); - } - } - /// s3(source, format, structure, compression_method) or - /// s3(source, access_key_id, secret_access_key, format) or - /// s3(source, NOSIGN, format, structure) - /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (count == 4) - { - auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(second_arg, "NOSIGN")) - { - args[count - 1] = structure_literal; - } - else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) - { - args[count - 2] = structure_literal; - } - else - { - args.push_back(structure_literal); - } - } - /// s3(source, access_key_id, secret_access_key, format, structure) or - /// s3(source, NOSIGN, format, structure, compression_method) - /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. - else if (count == 5) - { - auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); - if (boost::iequals(sedond_arg, "NOSIGN")) - { - args[count - 2] = structure_literal; - } - else - { - args[count - 1] = structure_literal; - } - } - /// s3(source, access_key_id, secret_access_key, format, structure, compression) - else if (count == 6) - { - args[count - 2] = structure_literal; - } - } -} - -ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - if (configuration.structure == "auto") - { - context->checkAccess(getSourceAccessType()); - configuration.update(context); - return StorageS3::getTableStructureFromData(configuration, std::nullopt, context); - } - - return parseColumnsListFromString(configuration.structure, context); -} - -bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); -} - -std::unordered_set TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const -{ - auto virtual_column_names = StorageS3::getVirtualColumnNames(); - return {virtual_column_names.begin(), virtual_column_names.end()}; -} - -StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const -{ - S3::URI s3_uri (configuration.url); - - ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); - else if (!structure_hint.empty()) - columns = structure_hint; - else if (!cached_columns.empty()) - columns = cached_columns; - - StoragePtr storage = std::make_shared( - configuration, - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - String{}, - /// No format_settings for table function S3 - std::nullopt); - - storage->startup(); - - return storage; -} - - -class TableFunctionGCS : public TableFunctionS3 -{ -public: - static constexpr auto name = "gcs"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "GCS"; } -}; - -class TableFunctionCOS : public TableFunctionS3 -{ -public: - static constexpr auto name = "cosn"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "COSN"; } -}; - -class TableFunctionOSS : public TableFunctionS3 -{ -public: - static constexpr auto name = "oss"; - std::string getName() const override - { - return name; - } -private: - const char * getStorageTypeName() const override { return "OSS"; } -}; - - -void registerTableFunctionGCS(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on Google Cloud Storage.)", - .examples{{"gcs", "SELECT * FROM gcs(url, hmac_key, hmac_secret)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -void registerTableFunctionS3(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the data stored on AWS S3.)", - .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - - -void registerTableFunctionCOS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -void registerTableFunctionOSS(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h deleted file mode 100644 index fa73c1d313e..00000000000 --- a/src/TableFunctions/TableFunctionS3.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include - - -namespace DB -{ - -class Context; - -/* s3(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in S3. - */ -class TableFunctionS3 : public ITableFunction -{ -public: - static constexpr auto name = "s3"; - static constexpr auto signature = " - url\n" - " - url, format\n" - " - url, format, structure\n" - " - url, format, structure, compression_method\n" - " - url, access_key_id, secret_access_key\n" - " - url, access_key_id, secret_access_key, session_token\n" - " - url, access_key_id, secret_access_key, format\n" - " - url, access_key_id, secret_access_key, session_token, format\n" - " - url, access_key_id, secret_access_key, format, structure\n" - " - url, access_key_id, secret_access_key, session_token, format, structure\n" - " - url, access_key_id, secret_access_key, format, structure, compression_method\n" - " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" - "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - - static size_t getMaxNumberOfArguments() { return 6; } - - String getName() const override - { - return name; - } - - virtual String getSignature() const - { - return signature; - } - - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; - - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; - - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - - static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); - -protected: - - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "S3"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - mutable StorageS3::Configuration configuration; - ColumnsDescription structure_hint; - -private: - - std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; -}; - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp deleted file mode 100644 index ce96f7f580b..00000000000 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - -#include "registerTableFunctions.h" - -#include - - -namespace DB -{ - -StoragePtr TableFunctionS3Cluster::executeImpl( - const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const -{ - StoragePtr storage; - ColumnsDescription columns; - bool structure_argument_was_provided = configuration.structure != "auto"; - - if (structure_argument_was_provided) - { - columns = parseColumnsListFromString(configuration.structure, context); - } - else if (!structure_hint.empty()) - { - columns = structure_hint; - } - - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) - { - /// On worker node this filename won't contains globs - storage = std::make_shared( - configuration, - context, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - /* comment */String{}, - /* format_settings */std::nullopt, /// No format_settings for S3Cluster - /*distributed_processing=*/true); - } - else - { - storage = std::make_shared( - cluster_name, - configuration, - StorageID(getDatabaseName(), table_name), - columns, - ConstraintsDescription{}, - context, - structure_argument_was_provided); - } - - storage->startup(); - - return storage; -} - - -void registerTableFunctionS3Cluster(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - - -} - -#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h deleted file mode 100644 index 718b0d90de8..00000000000 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ /dev/null @@ -1,64 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -/** - * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure, compression_method) - * A table function, which allows to process many files from S3 on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in S3 file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ -class TableFunctionS3Cluster : public ITableFunctionCluster -{ -public: - static constexpr auto name = "s3Cluster"; - static constexpr auto signature = " - cluster, url\n" - " - cluster, url, format\n" - " - cluster, url, format, structure\n" - " - cluster, url, access_key_id, secret_access_key\n" - " - cluster, url, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, format\n" - " - cluster, url, access_key_id, secret_access_key, format, structure\n" - " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" - "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - - String getName() const override - { - return name; - } - - String getSignature() const override - { - return signature; - } - -protected: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "S3Cluster"; } -}; - -} - -#endif diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 8c18c298f45..627d945fbf3 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -28,26 +28,17 @@ void registerTableFunctions() #endif #if USE_AWS_S3 - registerTableFunctionS3(factory); - registerTableFunctionS3Cluster(factory); - registerTableFunctionCOS(factory); - registerTableFunctionOSS(factory); - registerTableFunctionGCS(factory); - registerTableFunctionHudi(factory); + // registerTableFunctionS3Cluster(factory); + // registerTableFunctionHudi(factory); #if USE_PARQUET - registerTableFunctionDeltaLake(factory); + // registerTableFunctionDeltaLake(factory); #endif #if USE_AVRO - registerTableFunctionIceberg(factory); + // registerTableFunctionIceberg(factory); #endif #endif -#if USE_HDFS - registerTableFunctionHDFS(factory); - registerTableFunctionHDFSCluster(factory); -#endif - #if USE_HIVE registerTableFunctionHive(factory); #endif @@ -75,10 +66,8 @@ void registerTableFunctions() registerTableFunctionFormat(factory); registerTableFunctionExplain(factory); -#if USE_AZURE_BLOB_STORAGE - registerTableFunctionAzureBlobStorage(factory); - registerTableFunctionAzureBlobStorageCluster(factory); -#endif + registerTableFunctionObjectStorage(factory); + registerTableFunctionObjectStorageCluster(factory); } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index fae763e7dc8..cefb198273e 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -39,11 +39,6 @@ void registerTableFunctionIceberg(TableFunctionFactory & factory); #endif #endif -#if USE_HDFS -void registerTableFunctionHDFS(TableFunctionFactory & factory); -void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); -#endif - #if USE_HIVE void registerTableFunctionHive(TableFunctionFactory & factory); #endif @@ -73,8 +68,8 @@ void registerTableFunctionFormat(TableFunctionFactory & factory); void registerTableFunctionExplain(TableFunctionFactory & factory); #if USE_AZURE_BLOB_STORAGE -void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory); -void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory); +void registerTableFunctionObjectStorage(TableFunctionFactory & factory); +void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory); #endif void registerTableFunctions(); diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 3cccd07c134..41218e41069 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -29,6 +29,8 @@ def cluster(): with_azurite=True, ) cluster.start() + container_client = cluster.blob_service_client.get_container_client("cont") + container_client.create_container() yield cluster finally: cluster.shutdown() @@ -129,8 +131,10 @@ def test_create_table_connection_string(cluster): node = cluster.instances["node"] azure_query( node, - f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}'," - f"'cont', 'test_create_connection_string', 'CSV')", + f""" + CREATE TABLE test_create_table_conn_string (key UInt64, data String) + Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV') + """, ) From 6d91d92601c04f160ba95a743fca270371b65eb8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 12 Feb 2024 18:17:22 +0100 Subject: [PATCH 015/651] Better --- src/Backups/BackupIO_AzureBlobStorage.cpp | 13 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 8 +- .../AzureBlobStorage/AzureObjectStorage.h | 4 +- .../Cached/CachedObjectStorage.cpp | 2 +- .../Cached/CachedObjectStorage.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.cpp | 6 +- src/Disks/ObjectStorages/IObjectStorage.h | 8 +- .../ObjectStorageIteratorAsync.cpp | 63 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 19 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 4 +- .../DataLakes/DeltaLakeMetadataParser.h | 2 +- src/Storages/DataLakes/HudiMetadataParser.h | 3 +- .../DataLakes/Iceberg/IcebergMetadata.cpp | 1 - .../DataLakes/Iceberg/IcebergMetadata.h | 2 +- .../ObjectStorage/AzureConfiguration.cpp | 11 + .../ObjectStorage/AzureConfiguration.h | 2 +- .../ObjectStorage/HDFSConfiguration.h | 2 +- .../ObjectStorage/ReadBufferIterator.cpp | 179 ++++++ .../ObjectStorage/ReadBufferIterator.h | 179 +----- .../ObjectStorage/ReadFromObjectStorage.h | 105 ---- .../ReadFromStorageObjectStorage.cpp | 94 +++ .../ReadFromStorageObjectStorage.h | 60 ++ src/Storages/ObjectStorage/S3Configuration.h | 2 +- ....h => StorageObejctStorageConfiguration.h} | 28 +- .../ObjectStorage/StorageObjectStorage.cpp | 91 +-- .../StorageObjectStorageCluster.cpp | 9 +- .../StorageObjectStorageCluster.h | 1 - .../StorageObjectStorageConfiguration.cpp | 40 ++ ....h => StorageObjectStorageQuerySettings.h} | 8 + .../ObjectStorage/StorageObjectStorageSink.h | 2 +- .../StorageObjectStorageSource.cpp | 539 +++++++++--------- .../StorageObjectStorageSource.h | 98 ++-- .../StorageObjectStorage_fwd_internal.h | 11 + .../registerStorageObjectStorage.cpp | 18 +- src/Storages/S3Queue/S3QueueSource.cpp | 17 +- src/Storages/S3Queue/S3QueueSource.h | 25 +- src/Storages/S3Queue/S3QueueTableMetadata.h | 2 +- src/Storages/S3Queue/StorageS3Queue.cpp | 32 +- src/Storages/S3Queue/StorageS3Queue.h | 1 - src/TableFunctions/ITableFunctionDataLake.h | 2 +- .../TableFunctionObjectStorage.cpp | 55 +- .../TableFunctionObjectStorageCluster.cpp | 14 +- 42 files changed, 973 insertions(+), 791 deletions(-) create mode 100644 src/Storages/ObjectStorage/ReadBufferIterator.cpp delete mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorage.h create mode 100644 src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp create mode 100644 src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h rename src/Storages/ObjectStorage/{Configuration.h => StorageObejctStorageConfiguration.h} (73%) create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp rename src/Storages/ObjectStorage/{Settings.h => StorageObjectStorageQuerySettings.h} (86%) create mode 100644 src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index dc636f90be7..f12cc4c1d58 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -208,10 +208,15 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St /* for_disk_azure_blob_storage= */ true); } -void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) +void BackupWriterAzureBlobStorage::copyDataToFile( + const String & path_in_backup, + const CreateReadBufferFunction & create_read_buffer, + UInt64 start_pos, + UInt64 length) { - copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings, - threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWRAzure")); + copyDataToAzureBlobStorageFile( + create_read_buffer, start_pos, length, client, configuration.container, + path_in_backup, settings, threadPoolCallbackRunner(getBackupsIOThreadPool().get(), "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; @@ -245,7 +250,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist"); - return children[0]->metadata.size_bytes; + return children[0]->metadata->size_bytes; } std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 2ca44137442..bbbb5357505 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -128,15 +128,15 @@ bool AzureObjectStorage::exists(const StoredObject & object) const return false; } -ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix) const +ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { auto settings_ptr = settings.get(); auto client_ptr = client.get(); - return std::make_shared(path_prefix, client_ptr, settings_ptr->list_object_keys_size); + return std::make_shared(path_prefix, client_ptr, max_keys); } -void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const +void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { auto client_ptr = client.get(); @@ -168,7 +168,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith if (max_keys) { - int keys_left = max_keys - static_cast(children.size()); + size_t keys_left = max_keys - children.size(); if (keys_left <= 0) break; options.PageSizeHint = keys_left; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f16c35fb52c..31eb78924f9 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -69,9 +69,9 @@ public: SettingsPtr && settings_, const String & container_); - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; - ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override; + ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override; std::string getName() const override { return "AzureObjectStorage"; } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 1444f4c9c76..9f195b787a8 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -180,7 +180,7 @@ std::unique_ptr CachedObjectStorage::cloneObjectStorage( return object_storage->cloneObjectStorage(new_namespace, config, config_prefix, context); } -void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const +void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { object_storage->listObjects(path, children, max_keys); } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 437baead7be..ec116b63d01 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -80,7 +80,7 @@ public: const std::string & config_prefix, ContextPtr context) override; - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; ObjectMetadata getObjectMetadata(const std::string & path) const override; diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index 78fbdcaddfa..d36ef4f414a 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -24,16 +24,16 @@ bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const return !files.empty(); } -void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, int) const +void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported"); } -ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix) const +ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { RelativePathsWithMetadata files; - listObjects(path_prefix, files, 0); + listObjects(path_prefix, files, max_keys); return std::make_shared(std::move(files)); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 7d354e6383d..4955b0e6924 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -54,11 +54,11 @@ struct ObjectMetadata struct RelativePathWithMetadata { String relative_path; - ObjectMetadata metadata; + std::optional metadata; RelativePathWithMetadata() = default; - RelativePathWithMetadata(String relative_path_, ObjectMetadata metadata_) + explicit RelativePathWithMetadata(String relative_path_, std::optional metadata_ = std::nullopt) : relative_path(std::move(relative_path_)) , metadata(std::move(metadata_)) {} @@ -111,9 +111,9 @@ public: /// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d virtual bool existsOrHasAnyChild(const std::string & path) const; - virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const; + virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const; - virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const; + virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const; /// Get object metadata if supported. It should be possible to receive /// at least size of object diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index b7729623a64..62bdd0ed0c8 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -14,27 +14,32 @@ namespace ErrorCodes void IObjectStorageIteratorAsync::nextBatch() { std::lock_guard lock(mutex); - if (!is_finished) + if (is_finished) { + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 3"); + current_batch.clear(); + current_batch_iterator = current_batch.begin(); + } + else + { + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 4"); if (!is_initialized) { outcome_future = scheduleBatch(); is_initialized = true; } - BatchAndHasNext next_batch = outcome_future.get(); - current_batch = std::move(next_batch.batch); - accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - current_batch_iterator = current_batch.begin(); - if (next_batch.has_next) - outcome_future = scheduleBatch(); - else - is_finished = true; - } - else - { - current_batch.clear(); + chassert(outcome_future.valid()); + auto [batch, has_next] = outcome_future.get(); + current_batch = std::move(batch); current_batch_iterator = current_batch.begin(); + + accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); + + if (has_next) + outcome_future = scheduleBatch(); + else + is_finished = true; } } @@ -42,24 +47,10 @@ void IObjectStorageIteratorAsync::next() { std::lock_guard lock(mutex); - if (current_batch_iterator != current_batch.end()) - { + if (current_batch_iterator == current_batch.end()) + nextBatch(); + else ++current_batch_iterator; - } - else if (!is_finished) - { - if (outcome_future.valid()) - { - BatchAndHasNext next_batch = outcome_future.get(); - current_batch = std::move(next_batch.batch); - accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - current_batch_iterator = current_batch.begin(); - if (next_batch.has_next) - outcome_future = scheduleBatch(); - else - is_finished = true; - } - } } std::future IObjectStorageIteratorAsync::scheduleBatch() @@ -107,14 +98,16 @@ std::optional IObjectStorageIteratorAsync::getCurrent if (!is_initialized) nextBatch(); - if (current_batch_iterator != current_batch.end()) + if (current_batch_iterator == current_batch.end()) { - auto temp_current_batch = current_batch; - nextBatch(); - return temp_current_batch; + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 2"); + return std::nullopt; } - return std::nullopt; + auto temp_current_batch = std::move(current_batch); + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 1: {}", temp_current_batch.size()); + nextBatch(); + return temp_current_batch; } size_t IObjectStorageIteratorAsync::getAccumulatedSize() const diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index cc138c43c71..a9bd520e6e9 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -138,9 +138,10 @@ private: return outcome.GetResult().GetIsTruncated(); } - throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); + throw S3Exception(outcome.GetError().GetErrorType(), + "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + quoteString(request.GetBucket()), quoteString(request.GetPrefix()), + backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } std::shared_ptr client; @@ -263,13 +264,13 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN } -ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const +ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { auto settings_ptr = s3_settings.get(); - return std::make_shared(uri.bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size); + return std::make_shared(uri.bucket, path_prefix, client.get(), max_keys); } -void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const +void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { auto settings_ptr = s3_settings.get(); @@ -277,7 +278,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet request.SetBucket(uri.bucket); request.SetPrefix(path); if (max_keys) - request.SetMaxKeys(max_keys); + request.SetMaxKeys(static_cast(max_keys)); else request.SetMaxKeys(settings_ptr->list_object_keys_size); @@ -305,10 +306,10 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet if (max_keys) { - int keys_left = max_keys - static_cast(children.size()); + size_t keys_left = max_keys - children.size(); if (keys_left <= 0) break; - request.SetMaxKeys(keys_left); + request.SetMaxKeys(static_cast(keys_left)); } request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index ab0fa5bed68..a6843a383e5 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -100,9 +100,9 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; - ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override; + ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override; /// Uses `DeleteObjectRequest`. void removeObject(const StoredObject & object) override; diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h index f94024597d6..251ea3e3f15 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h index 2fc004595ca..72766a95876 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ b/src/Storages/DataLakes/HudiMetadataParser.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include namespace DB { diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp index 08cebb3f396..5543e60e7a7 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h index 92946e4192b..a289715848f 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/ObjectStorage/AzureConfiguration.cpp b/src/Storages/ObjectStorage/AzureConfiguration.cpp index ba3e796223a..04f6f26111b 100644 --- a/src/Storages/ObjectStorage/AzureConfiguration.cpp +++ b/src/Storages/ObjectStorage/AzureConfiguration.cpp @@ -89,6 +89,17 @@ StorageObjectStorageConfigurationPtr StorageAzureBlobConfiguration::clone() return configuration; } +StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other) +{ + connection_url = other.connection_url; + is_connection_string = other.is_connection_string; + account_name = other.account_name; + account_key = other.account_key; + container = other.container; + blob_path = other.blob_path; + blobs_paths = other.blobs_paths; +} + AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context) { const auto & context_settings = context->getSettingsRef(); diff --git a/src/Storages/ObjectStorage/AzureConfiguration.h b/src/Storages/ObjectStorage/AzureConfiguration.h index 40d718d7690..4f285128241 100644 --- a/src/Storages/ObjectStorage/AzureConfiguration.h +++ b/src/Storages/ObjectStorage/AzureConfiguration.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Storages/ObjectStorage/HDFSConfiguration.h b/src/Storages/ObjectStorage/HDFSConfiguration.h index f42cedf459d..aa45c634042 100644 --- a/src/Storages/ObjectStorage/HDFSConfiguration.h +++ b/src/Storages/ObjectStorage/HDFSConfiguration.h @@ -3,7 +3,7 @@ #if USE_HDFS -#include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp new file mode 100644 index 00000000000..dcdf36dbcf5 --- /dev/null +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -0,0 +1,179 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + +} + +ReadBufferIterator::ReadBufferIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const FileIterator & file_iterator_, + const std::optional & format_settings_, + const StorageObjectStorageSettings & query_settings_, + SchemaCache & schema_cache_, + ObjectInfos & read_keys_, + const ContextPtr & context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , file_iterator(file_iterator_) + , format_settings(format_settings_) + , query_settings(query_settings_) + , schema_cache(schema_cache_) + , read_keys(read_keys_) + , prev_read_keys_size(read_keys_.size()) +{ +} + +SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path) const +{ + auto source = fs::path(configuration->getDataSourceDescription()) / path; + return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); +} + +SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const +{ + Strings sources; + sources.reserve(read_keys.size()); + std::transform( + read_keys.begin(), read_keys.end(), + std::back_inserter(sources), + [&](const auto & elem) + { + return fs::path(configuration->getDataSourceDescription()) / elem->relative_path; + }); + return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext()); +} + +std::optional ReadBufferIterator::tryGetColumnsFromCache( + const ObjectInfos::iterator & begin, + const ObjectInfos::iterator & end) +{ + if (!query_settings.schema_inference_use_cache) + return std::nullopt; + + for (auto it = begin; it < end; ++it) + { + const auto & object_info = (*it); + auto get_last_mod_time = [&] -> std::optional + { + if (object_info->metadata) + return object_info->metadata->last_modified->epochMicroseconds(); + else + { + object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); + return object_info->metadata->last_modified->epochMicroseconds(); + } + }; + + auto cache_key = getKeyForSchemaCache(object_info->relative_path); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); + if (columns) + return columns; + } + + return std::nullopt; +} + +void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) +{ + if (query_settings.schema_inference_use_cache) + schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path), num_rows); +} + +void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) +{ + if (query_settings.schema_inference_use_cache + && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) + { + schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path), columns); + } +} + +void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns) +{ + if (query_settings.schema_inference_use_cache + && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + schema_cache.addManyColumns(getPathsForSchemaCache(), columns); + } +} + +String ReadBufferIterator::getLastFileName() const +{ + if (current_object_info) + return current_object_info->relative_path; + else + return ""; +} + +std::pair, std::optional> ReadBufferIterator::next() +{ + /// For default mode check cached columns for currently read keys on first iteration. + if (first && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + return {nullptr, cached_columns}; + } + + current_object_info = file_iterator->next(0); + if (!current_object_info || current_object_info->relative_path.empty()) + { + if (first) + { + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, " + "because there are no files with provided path. " + "You must specify table structure manually", + configuration->format); + } + return {nullptr, std::nullopt}; + } + + first = false; + + /// File iterator could get new keys after new iteration, + /// check them in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT + && read_keys.size() > prev_read_keys_size) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + prev_read_keys_size = read_keys.size(); + if (columns_from_cache) + return {nullptr, columns_from_cache}; + } + else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) + { + ObjectInfos paths = {current_object_info}; + if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) + return {nullptr, columns_from_cache}; + } + + first = false; + + chassert(current_object_info->metadata); + std::unique_ptr read_buffer = object_storage->readObject( + StoredObject(current_object_info->relative_path), + getContext()->getReadSettings(), + {}, + current_object_info->metadata->size_bytes); + + read_buffer = wrapReadBufferWithCompressionMethod( + std::move(read_buffer), + chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), + static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + + return {std::move(read_buffer), std::nullopt}; +} + +} diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 248700e2edf..4e9b8cfcfca 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -1,197 +1,54 @@ #pragma once #include -#include +#include #include -#include -#include #include namespace DB { -namespace ErrorCodes -{ - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; -} - -template class ReadBufferIterator : public IReadBufferIterator, WithContext { public: - using Storage = StorageObjectStorage; - using Source = StorageObjectStorageSource; - using FileIterator = std::shared_ptr; - using ObjectInfos = typename Storage::ObjectInfos; + using FileIterator = std::shared_ptr; ReadBufferIterator( ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, + ConfigurationPtr configuration_, const FileIterator & file_iterator_, const std::optional & format_settings_, + const StorageObjectStorageSettings & query_settings_, + SchemaCache & schema_cache_, ObjectInfos & read_keys_, - const ContextPtr & context_) - : WithContext(context_) - , object_storage(object_storage_) - , configuration(configuration_) - , file_iterator(file_iterator_) - , format_settings(format_settings_) - , storage_settings(StorageSettings::create(context_->getSettingsRef())) - , read_keys(read_keys_) - , prev_read_keys_size(read_keys_.size()) - { - } + const ContextPtr & context_); - std::pair, std::optional> next() override - { - /// For default mode check cached columns for currently read keys on first iteration. - if (first && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns}; - } + std::pair, std::optional> next() override; - current_object_info = file_iterator->next(0); - if (current_object_info->relative_path.empty()) - { - if (first) - { - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, " - "because there are no files with provided path. " - "You must specify table structure manually", - configuration->format); - } - return {nullptr, std::nullopt}; - } + void setNumRowsToLastFile(size_t num_rows) override; - first = false; + void setSchemaToLastFile(const ColumnsDescription & columns) override; - /// File iterator could get new keys after new iteration, - /// check them in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT - && read_keys.size() > prev_read_keys_size) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - return {nullptr, columns_from_cache}; - } - else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - ObjectInfos paths = {current_object_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache}; - } + void setResultingSchema(const ColumnsDescription & columns) override; - first = false; - - std::unique_ptr read_buffer = object_storage->readObject( - StoredObject(current_object_info->relative_path), - getContext()->getReadSettings(), - {}, - current_object_info->metadata.size_bytes); - - read_buffer = wrapReadBufferWithCompressionMethod( - std::move(read_buffer), - chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), - static_cast(getContext()->getSettingsRef().zstd_window_log_max)); - - return {std::move(read_buffer), std::nullopt}; - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (storage_settings.schema_inference_use_cache) - { - Storage::getSchemaCache(getContext()).addNumRows( - getKeyForSchemaCache(current_object_info->relative_path), num_rows); - } - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (storage_settings.schema_inference_use_cache - && storage_settings.schema_inference_mode == SchemaInferenceMode::UNION) - { - Storage::getSchemaCache(getContext()).addColumns( - getKeyForSchemaCache(current_object_info->relative_path), columns); - } - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (storage_settings.schema_inference_use_cache - && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - Storage::getSchemaCache(getContext()).addManyColumns(getPathsForSchemaCache(), columns); - } - } - - String getLastFileName() const override { return current_object_info->relative_path; } + String getLastFileName() const override; private: - SchemaCache::Key getKeyForSchemaCache(const String & path) const - { - auto source = fs::path(configuration->getDataSourceDescription()) / path; - return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); - } - - SchemaCache::Keys getPathsForSchemaCache() const - { - Strings sources; - sources.reserve(read_keys.size()); - std::transform( - read_keys.begin(), read_keys.end(), - std::back_inserter(sources), - [&](const auto & elem) - { - return fs::path(configuration->getDataSourceDescription()) / elem->relative_path; - }); - return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext()); - } - + SchemaCache::Key getKeyForSchemaCache(const String & path) const; + SchemaCache::Keys getPathsForSchemaCache() const; std::optional tryGetColumnsFromCache( - const ObjectInfos::iterator & begin, - const ObjectInfos::iterator & end) - { - if (!storage_settings.schema_inference_use_cache) - return std::nullopt; - - auto & schema_cache = Storage::getSchemaCache(getContext()); - for (auto it = begin; it < end; ++it) - { - const auto & object_info = (*it); - auto get_last_mod_time = [&] -> std::optional - { - if (object_info->metadata.last_modified) - return object_info->metadata.last_modified->epochMicroseconds(); - else - { - object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); - return object_info->metadata.last_modified->epochMicroseconds(); - } - }; - - auto cache_key = getKeyForSchemaCache(object_info->relative_path); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; - } - - return std::nullopt; - } + const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end); ObjectStoragePtr object_storage; - const Storage::ConfigurationPtr configuration; + const ConfigurationPtr configuration; const FileIterator file_iterator; const std::optional & format_settings; - const StorageObjectStorageSettings storage_settings; + const StorageObjectStorageSettings query_settings; + SchemaCache & schema_cache; ObjectInfos & read_keys; size_t prev_read_keys_size; - Storage::ObjectInfoPtr current_object_info; + ObjectInfoPtr current_object_info; bool first = true; }; } diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorage.h b/src/Storages/ObjectStorage/ReadFromObjectStorage.h deleted file mode 100644 index 9cb77dcc25e..00000000000 --- a/src/Storages/ObjectStorage/ReadFromObjectStorage.h +++ /dev/null @@ -1,105 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace DB -{ - -template -class ReadFromStorageObejctStorage : public SourceStepWithFilter -{ -public: - using Storage = StorageObjectStorage; - using Source = StorageObjectStorageSource; - - ReadFromStorageObejctStorage( - ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, - const String & name_, - const NamesAndTypesList & virtual_columns_, - const std::optional & format_settings_, - bool distributed_processing_, - ReadFromFormatInfo info_, - const bool need_only_count_, - ContextPtr context_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = info_.source_header}) - , object_storage(object_storage_) - , configuration(configuration_) - , context(std::move(context_)) - , info(std::move(info_)) - , virtual_columns(virtual_columns_) - , format_settings(format_settings_) - , name(name_ + "Source") - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - , distributed_processing(distributed_processing_) - { - } - - std::string getName() const override { return name; } - - void applyFilters() override - { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); - } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override - { - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - getName(), object_storage, configuration, info, format_settings, - context, max_block_size, iterator_wrapper, need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); - } - -private: - ObjectStoragePtr object_storage; - Storage::ConfigurationPtr configuration; - ContextPtr context; - - const ReadFromFormatInfo info; - const NamesAndTypesList virtual_columns; - const std::optional format_settings; - const String name; - const bool need_only_count; - const size_t max_block_size; - const size_t num_streams; - const bool distributed_processing; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate) - { - if (!iterator_wrapper) - { - iterator_wrapper = Source::createFileIterator( - configuration, object_storage, distributed_processing, context, - predicate, virtual_columns, nullptr, context->getFileProgressCallback()); - } - } -}; - -} diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp new file mode 100644 index 00000000000..2c27c816078 --- /dev/null +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp @@ -0,0 +1,94 @@ +#include +#include +#include + +namespace DB +{ + +ReadFromStorageObejctStorage::ReadFromStorageObejctStorage( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const String & name_, + const NamesAndTypesList & virtual_columns_, + const std::optional & format_settings_, + const StorageObjectStorageSettings & query_settings_, + bool distributed_processing_, + ReadFromFormatInfo info_, + SchemaCache & schema_cache_, + const bool need_only_count_, + ContextPtr context_, + size_t max_block_size_, + size_t num_streams_, + CurrentMetrics::Metric metric_threads_count_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_) + : SourceStepWithFilter(DataStream{.header = info_.source_header}) + , WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + , info(std::move(info_)) + , virtual_columns(virtual_columns_) + , format_settings(format_settings_) + , query_settings(query_settings_) + , schema_cache(schema_cache_) + , name(name_ + "Source") + , need_only_count(need_only_count_) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + , distributed_processing(distributed_processing_) + , metric_threads_count(metric_threads_count_) + , metric_threads_active(metric_threads_active_) + , metric_threads_scheduled(metric_threads_scheduled_) +{ +} + +void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predicate) +{ + if (!iterator_wrapper) + { + auto context = getContext(); + iterator_wrapper = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, distributed_processing, context, predicate, + virtual_columns, nullptr, query_settings.list_object_keys_size, context->getFileProgressCallback()); + } +} + +void ReadFromStorageObejctStorage::applyFilters() +{ + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + createIterator(predicate); +} + +void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + createIterator(nullptr); + auto context = getContext(); + + Pipes pipes; + for (size_t i = 0; i < num_streams; ++i) + { + auto threadpool = std::make_shared( + metric_threads_count, metric_threads_active, metric_threads_scheduled, /* max_threads */1); + + auto source = std::make_shared( + getName(), object_storage, configuration, info, format_settings, query_settings, + context, max_block_size, iterator_wrapper, need_only_count, schema_cache, std::move(threadpool)); + + pipes.emplace_back(std::move(source)); + } + + auto pipe = Pipe::unitePipes(std::move(pipes)); + if (pipe.empty()) + pipe = Pipe(std::make_shared(info.source_header)); + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); +} + +} diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h new file mode 100644 index 00000000000..f5e057d297f --- /dev/null +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h @@ -0,0 +1,60 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ReadFromStorageObejctStorage : public SourceStepWithFilter, WithContext +{ +public: + using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + + ReadFromStorageObejctStorage( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const String & name_, + const NamesAndTypesList & virtual_columns_, + const std::optional & format_settings_, + const StorageObjectStorageSettings & query_settings_, + bool distributed_processing_, + ReadFromFormatInfo info_, + SchemaCache & schema_cache_, + bool need_only_count_, + ContextPtr context_, + size_t max_block_size_, + size_t num_streams_, + CurrentMetrics::Metric metric_threads_count_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_); + + std::string getName() const override { return name; } + + void applyFilters() override; + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + +private: + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + std::shared_ptr iterator_wrapper; + + const ReadFromFormatInfo info; + const NamesAndTypesList virtual_columns; + const std::optional format_settings; + const StorageObjectStorageSettings query_settings; + SchemaCache & schema_cache; + const String name; + const bool need_only_count; + const size_t max_block_size; + const size_t num_streams; + const bool distributed_processing; + const CurrentMetrics::Metric metric_threads_count; + const CurrentMetrics::Metric metric_threads_active; + const CurrentMetrics::Metric metric_threads_scheduled; + + void createIterator(const ActionsDAG::Node * predicate); +}; + +} diff --git a/src/Storages/ObjectStorage/S3Configuration.h b/src/Storages/ObjectStorage/S3Configuration.h index 34f5735e02a..c953bc25c4e 100644 --- a/src/Storages/ObjectStorage/S3Configuration.h +++ b/src/Storages/ObjectStorage/S3Configuration.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include namespace DB { diff --git a/src/Storages/ObjectStorage/Configuration.h b/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h similarity index 73% rename from src/Storages/ObjectStorage/Configuration.h rename to src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h index 708041980e3..427d6a8d453 100644 --- a/src/Storages/ObjectStorage/Configuration.h +++ b/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h @@ -17,6 +17,12 @@ public: using Path = std::string; using Paths = std::vector; + static void initialize( + StorageObjectStorageConfiguration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure); + virtual Path getPath() const = 0; virtual void setPath(const Path & path) = 0; @@ -26,28 +32,24 @@ public: virtual String getDataSourceDescription() = 0; virtual String getNamespace() const = 0; - bool isPathWithGlobs() const { return getPath().find_first_of("*?{") != std::string::npos; } - bool isNamespaceWithGlobs() const { return getNamespace().find_first_of("*?{") != std::string::npos; } - - std::string getPathWithoutGlob() const { return getPath().substr(0, getPath().find_first_of("*?{")); } - - virtual bool withWildcard() const - { - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return getPath().find(PARTITION_ID_WILDCARD) != String::npos; - } + bool withWildcard() const; + bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } + bool isPathWithGlobs() const; + bool isNamespaceWithGlobs() const; + std::string getPathWithoutGlob() const; virtual void check(ContextPtr context) const = 0; virtual StorageObjectStorageConfigurationPtr clone() = 0; virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT - virtual void fromNamedCollection(const NamedCollection & collection) = 0; - virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; - String format = "auto"; String compression_method = "auto"; String structure = "auto"; + +protected: + virtual void fromNamedCollection(const NamedCollection & collection) = 0; + virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; }; using StorageObjectStorageConfigurationPtr = std::shared_ptr; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 9250ab8ecbe..9a7260ea47c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -9,12 +9,12 @@ #include #include #include -#include -#include +#include +#include #include #include #include -#include +#include namespace DB @@ -154,34 +154,38 @@ void StorageObjectStorage::read( size_t max_block_size, size_t num_streams) { - if (partition_by && configuration->withWildcard()) + auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context); + if (partition_by && query_configuration->withWildcard()) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned {} storage is not implemented yet", getName()); } - auto this_ptr = std::static_pointer_cast(shared_from_this()); - auto read_from_format_info = prepareReadingFromFormat( + const auto read_from_format_info = prepareReadingFromFormat( column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) + const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; - auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context); - auto reading = std::make_unique>( + auto read_step = std::make_unique( query_object_storage, query_configuration, getName(), virtual_columns, format_settings, + StorageSettings::create(local_context->getSettingsRef()), distributed_processing, std::move(read_from_format_info), + getSchemaCache(local_context), need_only_count, local_context, max_block_size, - num_streams); + num_streams, + StorageSettings::ObjectStorageThreads(), + StorageSettings::ObjectStorageThreadsActive(), + StorageSettings::ObjectStorageThreadsScheduled()); - query_plan.addStep(std::move(reading)); + query_plan.addStep(std::move(read_step)); } template @@ -191,35 +195,43 @@ SinkToStoragePtr StorageObjectStorage::write( ContextPtr local_context, bool /* async_insert */) { - auto insert_query = std::dynamic_pointer_cast(query); - auto partition_by_ast = insert_query - ? (insert_query->partition_by ? insert_query->partition_by : partition_by) - : nullptr; - bool is_partitioned_implementation = partition_by_ast && configuration->withWildcard(); + auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context); + const auto sample_block = metadata_snapshot->getSampleBlock(); - auto sample_block = metadata_snapshot->getSampleBlock(); - auto storage_settings = StorageSettings::create(local_context->getSettingsRef()); - - if (is_partitioned_implementation) + if (query_configuration->withWildcard()) { - return std::make_shared( - object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); + ASTPtr partition_by_ast = nullptr; + if (auto insert_query = std::dynamic_pointer_cast(query)) + { + if (insert_query->partition_by) + partition_by_ast = insert_query->partition_by; + else + partition_by_ast = partition_by; + } + + if (partition_by_ast) + { + return std::make_shared( + object_storage, query_configuration, format_settings, sample_block, local_context, partition_by_ast); + } } - if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs()) + if (query_configuration->withGlobs()) { throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "{} key '{}' contains globs, so the table is in readonly mode", - getName(), configuration->getPath()); + getName(), query_configuration->getPath()); } + const auto storage_settings = StorageSettings::create(local_context->getSettingsRef()); if (!storage_settings.truncate_on_insert - && object_storage->exists(StoredObject(configuration->getPath()))) + && object_storage->exists(StoredObject(query_configuration->getPath()))) { if (storage_settings.create_new_file_on_insert) { - size_t index = configuration->getPaths().size(); - const auto & first_key = configuration->getPaths()[0]; + auto & paths = query_configuration->getPaths(); + size_t index = paths.size(); + const auto & first_key = paths[0]; auto pos = first_key.find_first_of('.'); String new_key; @@ -233,7 +245,7 @@ SinkToStoragePtr StorageObjectStorage::write( } while (object_storage->exists(StoredObject(new_key))); - configuration->getPaths().push_back(new_key); + paths.push_back(new_key); } else { @@ -242,12 +254,12 @@ SinkToStoragePtr StorageObjectStorage::write( "Object in bucket {} with key {} already exists. " "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you " "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert", - configuration->getNamespace(), configuration->getPaths().back()); + query_configuration->getNamespace(), query_configuration->getPaths().back()); } } return std::make_shared( - object_storage, configuration, format_settings, sample_block, local_context); + object_storage, query_configuration, format_settings, sample_block, local_context); } template @@ -257,7 +269,7 @@ void StorageObjectStorage::truncate( ContextPtr, TableExclusiveLockHolder &) { - if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs()) + if (configuration->withGlobs()) { throw Exception( ErrorCodes::DATABASE_ACCESS_DENIED, @@ -279,21 +291,18 @@ ColumnsDescription StorageObjectStorage::getTableStructureFromD const std::optional & format_settings, ContextPtr context) { - using Source = StorageObjectStorageSource; - ObjectInfos read_keys; - auto file_iterator = Source::createFileIterator( + const auto settings = StorageSettings::create(context->getSettingsRef()); + auto file_iterator = StorageObjectStorageSource::createFileIterator( configuration, object_storage, /* distributed_processing */false, - context, /* predicate */{}, /* virtual_columns */{}, &read_keys); + context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size); - ReadBufferIterator read_buffer_iterator( + ReadBufferIterator read_buffer_iterator( object_storage, configuration, file_iterator, - format_settings, read_keys, context); + format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context); - const bool retry = configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs(); - return readSchemaFromFormat( - configuration->format, format_settings, - read_buffer_iterator, retry, context); + const bool retry = configuration->withGlobs(); + return readSchemaFromFormat(configuration->format, format_settings, read_buffer_iterator, retry, context); } template class StorageObjectStorage; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 414932016f4..39cd5d8eca6 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #include #include @@ -82,10 +82,11 @@ void StorageObjectStorageCluster::ad template RemoteQueryExecutor::Extension -StorageObjectStorageCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr &) const +StorageObjectStorageCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & local_context) const { - auto iterator = std::make_shared( - object_storage, configuration, predicate, virtual_columns, nullptr); + const auto settings = StorageSettings::create(local_context->getSettingsRef()); + auto iterator = std::make_shared( + object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size); auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next(0)->relative_path; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index b1f9af14e03..aae8f704a73 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -21,7 +21,6 @@ class StorageObjectStorageCluster : public IStorageCluster { public: using Storage = StorageObjectStorage; - using Source = StorageObjectStorageSource; StorageObjectStorageCluster( const String & cluster_name_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp new file mode 100644 index 00000000000..2d5760ed9d8 --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -0,0 +1,40 @@ +#include + + +namespace DB +{ + +void StorageObjectStorageConfiguration::initialize( + StorageObjectStorageConfiguration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); +} + +bool StorageObjectStorageConfiguration::withWildcard() const +{ + static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + return getPath().find(PARTITION_ID_WILDCARD) != String::npos; +} + +bool StorageObjectStorageConfiguration::isPathWithGlobs() const +{ + return getPath().find_first_of("*?{") != std::string::npos; +} + +bool StorageObjectStorageConfiguration::isNamespaceWithGlobs() const +{ + return getNamespace().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorageConfiguration::getPathWithoutGlob() const +{ + return getPath().substr(0, getPath().find_first_of("*?{")); +} + +} diff --git a/src/Storages/ObjectStorage/Settings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h similarity index 86% rename from src/Storages/ObjectStorage/Settings.h rename to src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h index 015cf9bc01d..454da7c355f 100644 --- a/src/Storages/ObjectStorage/Settings.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h @@ -23,6 +23,8 @@ struct StorageObjectStorageSettings bool create_new_file_on_insert; bool schema_inference_use_cache; SchemaInferenceMode schema_inference_mode; + bool skip_empty_files; + size_t list_object_keys_size; }; struct S3StorageSettings @@ -34,6 +36,8 @@ struct S3StorageSettings .create_new_file_on_insert = settings.s3_create_new_file_on_insert, .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3, .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, + .list_object_keys_size = settings.s3_list_object_keys_size, }; } @@ -53,6 +57,8 @@ struct AzureStorageSettings .create_new_file_on_insert = settings.azure_create_new_file_on_insert, .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure + .list_object_keys_size = settings.azure_list_object_keys_size, }; } @@ -72,6 +78,8 @@ struct HDFSStorageSettings .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs + .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 34ab8ebec66..a2d42d7fa9f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 9fc7925a6d1..f170a46112f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -28,20 +28,55 @@ namespace ErrorCodes extern const int CANNOT_COMPILE_REGEXP; } -template -std::shared_ptr::IIterator> -StorageObjectStorageSource::createFileIterator( - Storage::ConfigurationPtr configuration, +StorageObjectStorageSource::StorageObjectStorageSource( + String name_, + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const ReadFromFormatInfo & info, + std::optional format_settings_, + const StorageObjectStorageSettings & query_settings_, + ContextPtr context_, + UInt64 max_block_size_, + std::shared_ptr file_iterator_, + bool need_only_count_, + SchemaCache & schema_cache_, + std::shared_ptr reader_pool_) + : SourceWithKeyCondition(info.source_header, false) + , WithContext(context_) + , name(std::move(name_)) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , query_settings(query_settings_) + , max_block_size(max_block_size_) + , need_only_count(need_only_count_) + , read_from_format_info(info) + , create_reader_pool(reader_pool_) + , columns_desc(info.columns_description) + , file_iterator(file_iterator_) + , schema_cache(schema_cache_) + , create_reader_scheduler(threadPoolCallbackRunner(*create_reader_pool, "Reader")) +{ +} + +StorageObjectStorageSource::~StorageObjectStorageSource() +{ + create_reader_pool->wait(); +} + +std::shared_ptr StorageObjectStorageSource::createFileIterator( + ConfigurationPtr configuration, ObjectStoragePtr object_storage, bool distributed_processing, const ContextPtr & local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, + size_t list_object_keys_size, std::function file_progress_callback) { if (distributed_processing) - return std::make_shared(local_context->getReadTaskCallback()); + return std::make_shared(local_context->getReadTaskCallback()); if (configuration->isNamespaceWithGlobs()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); @@ -49,25 +84,240 @@ StorageObjectStorageSource::createFileIterator( if (configuration->isPathWithGlobs()) { /// Iterate through disclosed globs and make a source for each file - return std::make_shared( - object_storage, configuration, predicate, virtual_columns, read_keys, file_progress_callback); + return std::make_shared( + object_storage, configuration, predicate, virtual_columns, local_context, read_keys, list_object_keys_size, file_progress_callback); } else { - return std::make_shared( + return std::make_shared( object_storage, configuration, virtual_columns, read_keys, file_progress_callback); } } -template -StorageObjectStorageSource::GlobIterator::GlobIterator( +void StorageObjectStorageSource::lazyInitialize(size_t processor) +{ + if (initialized) + return; + + reader = createReader(processor); + if (reader) + reader_future = createReaderAsync(processor); + initialized = true; +} + +Chunk StorageObjectStorageSource::generate() +{ + lazyInitialize(0); + + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + total_rows_in_file += num_rows; + + size_t chunk_size = 0; + if (const auto * input_format = reader.getInputFormat()) + chunk_size = input_format->getApproxBytesReadForChunk(); + + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + + const auto & object_info = reader.getObjectInfo(); + chassert(object_info.metadata); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, + read_from_format_info.requested_virtual_columns, + fs::path(configuration->getNamespace()) / reader.getRelativePath(), + object_info.metadata->size_bytes); + + return chunk; + } + + if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) + addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); + + total_rows_in_file = 0; + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool->wait(); + reader_future = createReaderAsync(); + } + + return {}; +} + +void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t num_rows) +{ + const auto cache_key = getKeyForSchemaCache( + fs::path(configuration->getDataSourceDescription()) / path, + configuration->format, + format_settings, + getContext()); + + schema_cache.addNumRows(cache_key, num_rows); +} + +std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info) +{ + const auto cache_key = getKeyForSchemaCache( + fs::path(configuration->getDataSourceDescription()) / object_info->relative_path, + configuration->format, + format_settings, + getContext()); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info->metadata && object_info->metadata->last_modified + ? object_info->metadata->last_modified->epochMicroseconds() + : 0; + }; + return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); +} + +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +{ + ObjectInfoPtr object_info; + do + { + object_info = file_iterator->next(processor); + if (!object_info || object_info->relative_path.empty()) + return {}; + + if (!object_info->metadata) + object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); + } + while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0); + + QueryPipelineBuilder builder; + std::shared_ptr source; + std::unique_ptr read_buf; + + std::optional num_rows_from_cache = need_only_count + && getContext()->getSettingsRef().use_cache_for_count_from_files + ? tryGetNumRowsFromCache(object_info) + : std::nullopt; + + if (num_rows_from_cache) + { + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + builder.init(Pipe(std::make_shared( + read_from_format_info.format_header, *num_rows_from_cache, max_block_size))); + } + else + { + const auto compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method); + const auto max_parsing_threads = need_only_count ? std::optional(1) : std::nullopt; + read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes); + + auto input_format = FormatFactory::instance().getInput( + configuration->format, *read_buf, read_from_format_info.format_header, + getContext(), max_block_size, format_settings, max_parsing_threads, + std::nullopt, /* is_remote_fs */ true, compression_method); + + if (key_condition) + input_format->setKeyCondition(key_condition); + + if (need_only_count) + input_format->needOnlyCount(); + + builder.init(Pipe(input_format)); + + if (columns_desc.hasDefaults()) + { + builder.addSimpleTransform( + [&](const Block & header) + { + return std::make_shared(header, columns_desc, *input_format, getContext()); + }); + } + + source = input_format; + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, read_from_format_info.requested_columns); + }); + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto current_reader = std::make_unique(*pipeline); + + ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); + + return ReaderHolder( + object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); +} + +std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +{ + return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); +} + +std::unique_ptr StorageObjectStorageSource::createReadBuffer(const String & key, size_t object_size) +{ + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + read_settings.enable_filesystem_cache = false; + read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; + + const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; + + // Create a read buffer that will prefetch the first ~1 MB of the file. + // When reading lots of tiny files, this prefetching almost doubles the throughput. + // For bigger files, parallel reading is more useful. + if (use_prefetch) + { + LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); + + auto async_reader = object_storage->readObjects( + StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings); + + async_reader->setReadUntilEnd(); + if (read_settings.remote_fs_prefetch) + async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); + + return async_reader; + } + else + { + /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting. + return object_storage->readObject(StoredObject(key), read_settings); + } +} + +StorageObjectStorageSource::GlobIterator::GlobIterator( ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, + ConfigurationPtr configuration_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, + ContextPtr context_, ObjectInfos * read_keys_, + size_t list_object_keys_size, std::function file_progress_callback_) - : object_storage(object_storage_) + : WithContext(context_) + , object_storage(object_storage_) , configuration(configuration_) , virtual_columns(virtual_columns_) , read_keys(read_keys_) @@ -81,7 +331,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( { const auto key_with_globs = configuration_->getPath(); const auto key_prefix = configuration->getPathWithoutGlob(); - object_storage_iterator = object_storage->iterate(key_prefix); + object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size); matcher = std::make_unique(makeRegexpPatternFromGlobs(key_with_globs)); if (matcher->ok()) @@ -113,13 +363,11 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } } -template -StorageObjectStorageSource::ObjectInfoPtr -StorageObjectStorageSource::GlobIterator::next(size_t /* processor */) +ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor */) { std::lock_guard lock(next_mutex); - if (is_finished && index >= object_infos.size()) + if (is_finished) return {}; bool need_new_batch = object_infos.empty() || index >= object_infos.size(); @@ -130,9 +378,10 @@ StorageObjectStorageSource::GlobIterator::next(size_t /* proces while (new_batch.empty()) { auto result = object_storage_iterator->getCurrentBatchAndScheduleNext(); + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", result.has_value()); if (result.has_value()) { - new_batch = result.value(); + new_batch = std::move(result.value()); } else { @@ -169,7 +418,8 @@ StorageObjectStorageSource::GlobIterator::next(size_t /* proces { for (const auto & object_info : object_infos) { - file_progress_callback(FileProgress(0, object_info->metadata.size_bytes)); + chassert(object_info->metadata); + file_progress_callback(FileProgress(0, object_info->metadata->size_bytes)); } } } @@ -181,10 +431,9 @@ StorageObjectStorageSource::GlobIterator::next(size_t /* proces return object_infos[current_index]; } -template -StorageObjectStorageSource::KeysIterator::KeysIterator( +StorageObjectStorageSource::KeysIterator::KeysIterator( ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, + ConfigurationPtr configuration_, const NamesAndTypesList & virtual_columns_, ObjectInfos * read_keys_, std::function file_progress_callback_) @@ -199,15 +448,13 @@ StorageObjectStorageSource::KeysIterator::KeysIterator( /// TODO: should we add metadata if we anyway fetch it if file_progress_callback is passed? for (auto && key : keys) { - auto object_info = std::make_shared(key, ObjectMetadata{}); + auto object_info = std::make_shared(key); read_keys_->emplace_back(object_info); } } } -template -StorageObjectStorageSource::ObjectInfoPtr -StorageObjectStorageSource::KeysIterator::next(size_t /* processor */) +ObjectInfoPtr StorageObjectStorageSource::KeysIterator::next(size_t /* processor */) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) @@ -225,240 +472,4 @@ StorageObjectStorageSource::KeysIterator::next(size_t /* proces return std::make_shared(key, metadata); } -template -Chunk StorageObjectStorageSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (const auto * input_format = reader.getInputFormat()) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - read_from_format_info.requested_virtual_columns, - fs::path(configuration->getNamespace()) / reader.getRelativePath(), - reader.getObjectInfo().metadata.size_bytes); - - return chunk; - } - - if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); - - total_rows_in_file = 0; - - assert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } - - return {}; -} - -template -void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - String source = fs::path(configuration->getDataSourceDescription()) / path; - auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); - Storage::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -template -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info) -{ - String source = fs::path(configuration->getDataSourceDescription()) / object_info->relative_path; - auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - auto last_mod = object_info->metadata.last_modified; - if (last_mod) - return last_mod->epochTime(); - else - { - object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); - return object_info->metadata.last_modified->epochMicroseconds(); - } - }; - return Storage::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -template -StorageObjectStorageSource::StorageObjectStorageSource( - String name_, - ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, - const ReadFromFormatInfo & info, - std::optional format_settings_, - ContextPtr context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - :ISource(info.source_header, false) - , WithContext(context_) - , name(std::move(name_)) - , object_storage(object_storage_) - , configuration(configuration_) - , format_settings(format_settings_) - , max_block_size(max_block_size_) - , need_only_count(need_only_count_) - , read_from_format_info(info) - , columns_desc(info.columns_description) - , file_iterator(file_iterator_) - , create_reader_pool(StorageSettings::ObjectStorageThreads(), - StorageSettings::ObjectStorageThreadsActive(), - StorageSettings::ObjectStorageThreadsScheduled(), 1) - , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "Reader")) -{ - reader = createReader(); - if (reader) - reader_future = createReaderAsync(); -} - -template -StorageObjectStorageSource::~StorageObjectStorageSource() -{ - create_reader_pool.wait(); -} - -template -StorageObjectStorageSource::ReaderHolder -StorageObjectStorageSource::createReader(size_t processor) -{ - auto object_info = file_iterator->next(processor); - if (object_info->relative_path.empty()) - return {}; - - if (object_info->metadata.size_bytes == 0) - object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); - - QueryPipelineBuilder builder; - std::shared_ptr source; - std::unique_ptr read_buf; - std::optional num_rows_from_cache = need_only_count - && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(object_info) - : std::nullopt; - - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - source = std::make_shared( - read_from_format_info.format_header, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - auto compression_method = chooseCompressionMethod( - object_info->relative_path, configuration->compression_method); - - read_buf = createReadBuffer(object_info->relative_path, object_info->metadata.size_bytes); - - auto input_format = FormatFactory::instance().getInput( - configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), max_block_size, format_settings, max_parsing_threads, - std::nullopt, /* is_remote_fs */ true, compression_method); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - - if (columns_desc.hasDefaults()) - { - builder.addSimpleTransform( - [&](const Block & header) - { - return std::make_shared(header, columns_desc, *input_format, getContext()); - }); - } - - source = input_format; - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, read_from_format_info.requested_columns); - }); - - auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - auto current_reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - - return ReaderHolder{object_info, std::move(read_buf), - std::move(source), std::move(pipeline), std::move(current_reader)}; -} - -template -std::future::ReaderHolder> -StorageObjectStorageSource::createReaderAsync(size_t processor) -{ - return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); -} - -template -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const String & key, size_t object_size) -{ - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); - read_settings.enable_filesystem_cache = false; - read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - - // auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; - // const bool object_too_small = object_size <= 2 * download_buffer_size; - - // Create a read buffer that will prefetch the first ~1 MB of the file. - // When reading lots of tiny files, this prefetching almost doubles the throughput. - // For bigger files, parallel reading is more useful. - // if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) - // { - // LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); - - // auto async_reader = object_storage->readObjects( - // StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings); - - // async_reader->setReadUntilEnd(); - // if (read_settings.remote_fs_prefetch) - // async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY); - - // return async_reader; - // } - // else - return object_storage->readObject(StoredObject(key), read_settings); -} - -template class StorageObjectStorageSource; -template class StorageObjectStorageSource; -template class StorageObjectStorageSource; - } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index f68a5d47456..0d6a6b71271 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -1,31 +1,19 @@ #pragma once -#include +#include +#include #include #include +#include +#include namespace DB { -template -class StorageObjectStorageSource : public ISource, WithContext +class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { friend class StorageS3QueueSource; public: - using Source = StorageObjectStorageSource; - using Storage = StorageObjectStorage; - using ObjectInfo = Storage::ObjectInfo; - using ObjectInfoPtr = Storage::ObjectInfoPtr; - using ObjectInfos = Storage::ObjectInfos; - - class IIterator : public WithContext - { - public: - virtual ~IIterator() = default; - - virtual size_t estimatedKeysCount() = 0; - virtual ObjectInfoPtr next(size_t processor) = 0; - }; - + class IIterator; class ReadTaskIterator; class GlobIterator; class KeysIterator; @@ -33,13 +21,16 @@ public: StorageObjectStorageSource( String name_, ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration, + ConfigurationPtr configuration, const ReadFromFormatInfo & info, std::optional format_settings_, + const StorageObjectStorageSettings & query_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - bool need_only_count_); + bool need_only_count_, + SchemaCache & schema_cache_, + std::shared_ptr reader_pool_); ~StorageObjectStorageSource() override; @@ -48,32 +39,35 @@ public: Chunk generate() override; static std::shared_ptr createFileIterator( - Storage::ConfigurationPtr configuration, + ConfigurationPtr configuration, ObjectStoragePtr object_storage, bool distributed_processing, const ContextPtr & local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, + size_t list_object_keys_size, std::function file_progress_callback = {}); protected: - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfoPtr & object_info); - const String name; ObjectStoragePtr object_storage; - const Storage::ConfigurationPtr configuration; + const ConfigurationPtr configuration; const std::optional format_settings; + const StorageObjectStorageSettings query_settings; const UInt64 max_block_size; const bool need_only_count; const ReadFromFormatInfo read_from_format_info; - + const std::shared_ptr create_reader_pool; ColumnsDescription columns_desc; std::shared_ptr file_iterator; - size_t total_rows_in_file = 0; + SchemaCache & schema_cache; + bool initialized = false; - struct ReaderHolder + size_t total_rows_in_file = 0; + LoggerPtr log = getLogger("StorageObjectStorageSource"); + + struct ReaderHolder : private boost::noncopyable { public: ReaderHolder( @@ -86,15 +80,15 @@ protected: , read_buf(std::move(read_buf_)) , source(std::move(source_)) , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) - { - } + , reader(std::move(reader_)) {} ReaderHolder() = default; - ReaderHolder(const ReaderHolder & other) = delete; - ReaderHolder & operator=(const ReaderHolder & other) = delete; ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } + explicit operator bool() const { return reader != nullptr; } + PullingPipelineExecutor * operator->() { return reader.get(); } + const PullingPipelineExecutor * operator->() const { return reader.get(); } + ReaderHolder & operator=(ReaderHolder && other) noexcept { /// The order of destruction is important. @@ -107,9 +101,6 @@ protected: return *this; } - explicit operator bool() const { return reader != nullptr; } - PullingPipelineExecutor * operator->() { return reader.get(); } - const PullingPipelineExecutor * operator->() const { return reader.get(); } const String & getRelativePath() const { return object_info->relative_path; } const ObjectInfo & getObjectInfo() const { return *object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } @@ -123,20 +114,29 @@ protected: }; ReaderHolder reader; - LoggerPtr log = getLogger("StorageObjectStorageSource"); - ThreadPool create_reader_pool; ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(size_t processor = 0); std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const String & key, size_t object_size); + + void addNumRowsToCache(const String & path, size_t num_rows); + std::optional tryGetNumRowsFromCache(const ObjectInfoPtr & object_info); + void lazyInitialize(size_t processor); }; -template -class StorageObjectStorageSource::ReadTaskIterator : public IIterator +class StorageObjectStorageSource::IIterator +{ +public: + virtual ~IIterator() = default; + + virtual size_t estimatedKeysCount() = 0; + virtual ObjectInfoPtr next(size_t processor) = 0; +}; + +class StorageObjectStorageSource::ReadTaskIterator : public IIterator { public: explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {} @@ -149,16 +149,17 @@ private: ReadTaskCallback callback; }; -template -class StorageObjectStorageSource::GlobIterator : public IIterator +class StorageObjectStorageSource::GlobIterator : public IIterator, WithContext { public: GlobIterator( ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, + ConfigurationPtr configuration_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns_, + ContextPtr context_, ObjectInfos * read_keys_, + size_t list_object_keys_size, std::function file_progress_callback_ = {}); ~GlobIterator() override = default; @@ -169,7 +170,7 @@ public: private: ObjectStoragePtr object_storage; - Storage::ConfigurationPtr configuration; + ConfigurationPtr configuration; ActionsDAGPtr filter_dag; NamesAndTypesList virtual_columns; @@ -189,13 +190,12 @@ private: std::function file_progress_callback; }; -template -class StorageObjectStorageSource::KeysIterator : public IIterator +class StorageObjectStorageSource::KeysIterator : public IIterator { public: KeysIterator( ObjectStoragePtr object_storage_, - Storage::ConfigurationPtr configuration_, + ConfigurationPtr configuration_, const NamesAndTypesList & virtual_columns_, ObjectInfos * read_keys_, std::function file_progress_callback = {}); @@ -208,7 +208,7 @@ public: private: const ObjectStoragePtr object_storage; - const Storage::ConfigurationPtr configuration; + const ConfigurationPtr configuration; const NamesAndTypesList virtual_columns; const std::function file_progress_callback; const std::vector keys; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h new file mode 100644 index 00000000000..51be7419e1c --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h @@ -0,0 +1,11 @@ +#include + +namespace DB +{ + +using ConfigurationPtr = StorageObjectStorageConfigurationPtr; +using ObjectInfo = RelativePathWithMetadata; +using ObjectInfoPtr = std::shared_ptr; +using ObjectInfos = std::vector; + +} diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index bc9f93690f5..f7ab37490e1 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -8,18 +8,6 @@ namespace DB { -static void initializeConfiguration( - StorageObjectStorageConfiguration & configuration, - ASTs & engine_args, - ContextPtr local_context, - bool with_table_structure) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - configuration.fromNamedCollection(*named_collection); - else - configuration.fromAST(engine_args, local_context, with_table_structure); -} - template static std::shared_ptr> createStorageObjectStorage( const StorageFactory::Arguments & args, @@ -82,7 +70,7 @@ void registerStorageAzure(StorageFactory & factory) { auto context = args.getLocalContext(); auto configuration = std::make_shared(); - initializeConfiguration(*configuration, args.engine_args, context, false); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); return createStorageObjectStorage(args, configuration, "Azure", context); }, { @@ -101,7 +89,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) { auto context = args.getLocalContext(); auto configuration = std::make_shared(); - initializeConfiguration(*configuration, args.engine_args, context, false); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); return createStorageObjectStorage(args, configuration, name, context); }, { @@ -136,7 +124,7 @@ void registerStorageHDFS(StorageFactory & factory) { auto context = args.getLocalContext(); auto configuration = std::make_shared(); - initializeConfiguration(*configuration, args.engine_args, context, false); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); return createStorageObjectStorage(args, configuration, "HDFS", context); }, { diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index bd34d1ec093..b64aa23d47c 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -35,7 +35,7 @@ StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( const std::string & key_, const ObjectMetadata & object_metadata_, Metadata::ProcessingNodeHolderPtr processing_holder_) - : Source::ObjectInfo(key_, object_metadata_) + : ObjectInfo(key_, object_metadata_) , processing_holder(processing_holder_) { } @@ -55,15 +55,15 @@ StorageS3QueueSource::FileIterator::FileIterator( if (sharded_processing) { for (const auto & id : metadata->getProcessingIdsForShard(current_shard)) - sharded_keys.emplace(id, std::deque{}); + sharded_keys.emplace(id, std::deque{}); } } -StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor) +StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor) { while (!shutdown_called) { - Source::ObjectInfoPtr val{nullptr}; + ObjectInfoPtr val{nullptr}; { std::unique_lock lk(sharded_keys_mutex, std::defer_lock); @@ -140,7 +140,7 @@ StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator:: if (processing_holder) { - return std::make_shared(val->relative_path, val->metadata, processing_holder); + return std::make_shared(val->relative_path, val->metadata.value(), processing_holder); } else if (sharded_processing && metadata->getFileStatus(val->relative_path)->state == S3QueueFilesMetadata::FileStatus::State::Processing) @@ -161,7 +161,7 @@ size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() StorageS3QueueSource::StorageS3QueueSource( String name_, const Block & header_, - std::unique_ptr internal_source_, + std::unique_ptr internal_source_, std::shared_ptr files_metadata_, size_t processing_id_, const S3QueueAction & action_, @@ -273,7 +273,8 @@ Chunk StorageS3QueueSource::generate() file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata.size_bytes); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata->size_bytes); return chunk; } } @@ -311,7 +312,7 @@ Chunk StorageS3QueueSource::generate() /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool.wait(); + internal_source->create_reader_pool->wait(); reader_future = internal_source->createReaderAsync(processing_id); } diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index fcf5c5c0160..2bdac7f2311 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include @@ -22,16 +22,19 @@ class StorageS3QueueSource : public ISource, WithContext { public: using Storage = StorageObjectStorage; - using Source = StorageObjectStorageSource; using ConfigurationPtr = Storage::ConfigurationPtr; - using GlobIterator = Source::GlobIterator; + using GlobIterator = StorageObjectStorageSource::GlobIterator; using ZooKeeperGetter = std::function; using RemoveFileFunc = std::function; using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr; + using ReaderHolder = StorageObjectStorageSource::ReaderHolder; using Metadata = S3QueueFilesMetadata; + using ObjectInfo = RelativePathWithMetadata; + using ObjectInfoPtr = std::shared_ptr; + using ObjectInfos = std::vector; - struct S3QueueObjectInfo : public Source::ObjectInfo + struct S3QueueObjectInfo : public ObjectInfo { S3QueueObjectInfo( const std::string & key_, @@ -41,7 +44,7 @@ public: Metadata::ProcessingNodeHolderPtr processing_holder; }; - class FileIterator : public Source::IIterator + class FileIterator : public StorageObjectStorageSource::IIterator { public: FileIterator( @@ -53,7 +56,7 @@ public: /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - Source::ObjectInfoPtr next(size_t processor) override; + ObjectInfoPtr next(size_t processor) override; size_t estimatedKeysCount() override; @@ -66,14 +69,14 @@ public: const bool sharded_processing; const size_t current_shard; - std::unordered_map> sharded_keys; + std::unordered_map> sharded_keys; std::mutex sharded_keys_mutex; }; StorageS3QueueSource( String name_, const Block & header_, - std::unique_ptr internal_source_, + std::unique_ptr internal_source_, std::shared_ptr files_metadata_, size_t processing_id_, const S3QueueAction & action_, @@ -97,7 +100,7 @@ private: const S3QueueAction action; const size_t processing_id; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; + const std::shared_ptr internal_source; const NamesAndTypesList requested_virtual_columns; const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; @@ -107,8 +110,8 @@ private: RemoveFileFunc remove_file_func; LoggerPtr log; - Source::ReaderHolder reader; - std::future reader_future; + ReaderHolder reader; + std::future reader_future; std::atomic initialized{false}; size_t processed_rows_from_file = 0; diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index 942ce7973ef..70dd8f27d71 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -3,7 +3,7 @@ #if USE_AWS_S3 #include -#include +#include #include namespace DB diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index fa7132f705a..fc4ef77ebb9 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -341,16 +341,23 @@ std::shared_ptr StorageS3Queue::createSource( size_t max_block_size, ContextPtr local_context) { - auto internal_source = std::make_unique( + auto threadpool = std::make_shared(CurrentMetrics::ObjectStorageS3Threads, + CurrentMetrics::ObjectStorageS3ThreadsActive, + CurrentMetrics::ObjectStorageS3ThreadsScheduled, + /* max_threads */1); + auto internal_source = std::make_unique( getName(), object_storage, configuration, info, format_settings, + S3StorageSettings::create(local_context->getSettingsRef()), local_context, max_block_size, file_iterator, - false); + false, + Storage::getSchemaCache(local_context), + threadpool); auto file_deleter = [=, this](const std::string & path) mutable { @@ -555,25 +562,14 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const } } -std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr , const ActionsDAG::Node * predicate) +std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { - auto glob_iterator = std::make_unique(object_storage, configuration, predicate, virtual_columns, nullptr); - + auto settings = S3StorageSettings::create(local_context->getSettingsRef()); + auto glob_iterator = std::make_unique( + object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size); return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } -static void initializeConfiguration( - StorageObjectStorageConfiguration & configuration, - ASTs & engine_args, - ContextPtr local_context, - bool with_table_structure) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - configuration.fromNamedCollection(*named_collection); - else - configuration.fromAST(engine_args, local_context, with_table_structure); -} - void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) { factory.registerStorage( @@ -585,7 +581,7 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = std::make_shared(); - initializeConfiguration(*configuration, args.engine_args, args.getContext(), false); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getContext(), false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 88f9bd65093..46a8b8d82c1 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -27,7 +27,6 @@ class StorageS3Queue : public IStorage, WithContext { public: using Storage = StorageObjectStorage; - using Source = StorageObjectStorageSource; using ConfigurationPtr = Storage::ConfigurationPtr; StorageS3Queue( diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 884e1f5c4a2..0ffa1460d78 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -10,7 +10,7 @@ # include # include # include -#include +#include #include #include diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index d009a9347f3..de46c13af37 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -27,20 +27,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -static void initializeConfiguration( - StorageObjectStorageConfiguration & configuration, - ASTs & engine_args, - ContextPtr local_context, - bool with_table_structure) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - configuration.fromNamedCollection(*named_collection); - else - configuration.fromAST(engine_args, local_context, with_table_structure); -} - template -ObjectStoragePtr TableFunctionObjectStorage::getObjectStorage(const ContextPtr & context, bool create_readonly) const +ObjectStoragePtr TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const { if (!object_storage) object_storage = configuration->createOrUpdateObjectStorage(context, create_readonly); @@ -48,7 +37,8 @@ ObjectStoragePtr TableFunctionObjectStorage -std::vector TableFunctionObjectStorage::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +std::vector TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const { auto & table_function_node = query_node_table_function->as(); auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); @@ -65,16 +55,18 @@ std::vector TableFunctionObjectStorage -void TableFunctionObjectStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +void TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) { Configuration::addStructureToArgs(args, structure, context); } template -void TableFunctionObjectStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) +void TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { configuration = std::make_shared(); - initializeConfiguration(*configuration, engine_args, local_context, true); + StorageObjectStorageConfiguration::initialize(*configuration, engine_args, local_context, true); } template @@ -91,7 +83,8 @@ void TableFunctionObjectStorage::par } template -ColumnsDescription TableFunctionObjectStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const +ColumnsDescription TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const { if (configuration->structure == "auto") { @@ -104,13 +97,15 @@ ColumnsDescription TableFunctionObjectStorage -bool TableFunctionObjectStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) +bool TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context) { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); } template -std::unordered_set TableFunctionObjectStorage::getVirtualsToCheckBeforeUsingStructureHint() const +std::unordered_set TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const { auto virtual_column_names = StorageObjectStorage::getVirtualColumnNames(); return {virtual_column_names.begin(), virtual_column_names.end()}; @@ -166,15 +161,33 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) factory.registerFunction>( { + .documentation = + { + .description=R"(The table function can be used to read the data stored on GCS.)", + .examples{{"gcs", "SELECT * FROM gcs(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, .allow_readonly = false }); factory.registerFunction>( { + .documentation = + { + .description=R"(The table function can be used to read the data stored on COSN.)", + .examples{{"cosn", "SELECT * FROM cosn(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, .allow_readonly = false }); factory.registerFunction>( { + .documentation = + { + .description=R"(The table function can be used to read the data stored on OSS.)", + .examples{{"oss", "SELECT * FROM oss(url, access_key_id, secret_access_key)", ""} + }, + .categories{"DataLake"}}, .allow_readonly = false }); #endif diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 1d27a857cea..8e6c96a3f2a 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -76,8 +75,8 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) factory.registerFunction( { .documentation = { - .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", - .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .description=R"(The table function can be used to read the data stored on S3 in parallel for many nodes in a specified cluster.)", + .examples{{"s3Cluster", "SELECT * FROM s3Cluster(cluster, url, format, structure)", ""}}}, .allow_readonly = false } ); @@ -95,7 +94,14 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) #endif #if USE_HDFS - factory.registerFunction(); + factory.registerFunction( + { + .documentation = { + .description=R"(The table function can be used to read the data stored on HDFS in parallel for many nodes in a specified cluster.)", + .examples{{"HDFSCluster", "SELECT * FROM HDFSCluster(cluster_name, uri, format)", ""}}}, + .allow_readonly = false + } + ); #endif } From 27a8bcc4383578b267ebcf0c8e0f65e83053c750 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 13 Feb 2024 20:16:37 +0100 Subject: [PATCH 016/651] Update ReadHelpers.cpp to fix failing style check --- src/IO/ReadHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 3f9ceef50d4..ddf932b98a6 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -540,7 +540,7 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) if (*buf.position() == '\r') { - ++buf.position(); // advance to \n after \r + ++buf.position(); } } } From 80b2276599024032ca656206042b2d5f1fdc1571 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 14 Feb 2024 10:38:20 +0100 Subject: [PATCH 017/651] fix style check --- src/IO/ReadHelpers.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index ddf932b98a6..af66cbb4cb5 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -510,7 +510,6 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) { next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end()); } - appendToStringOrVector(s, buf, next_pos); buf.position() = next_pos; @@ -539,9 +538,8 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) } if (*buf.position() == '\r') - { - ++buf.position(); - } + ++buf.position(); + } } @@ -1987,7 +1985,4 @@ void readTSVField(String & s, ReadBuffer & buf) template void readTSVField(String & s, ReadBuffer & buf); template void readTSVField(String & s, ReadBuffer & buf); - } - - From 84b0fe670a4d73cc0b5c26bb922e90369025dae6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Feb 2024 17:03:11 +0100 Subject: [PATCH 018/651] Refactor data lakes --- src/Backups/BackupIO_AzureBlobStorage.h | 2 +- .../registerBackupEngineAzureBlobStorage.cpp | 5 +- src/CMakeLists.txt | 7 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 6 +- ...jectStorageRemoteMetadataRestoreHelper.cpp | 28 ++-- src/Disks/ObjectStorages/IObjectStorage.h | 4 +- .../ObjectStorageIteratorAsync.cpp | 30 ++++- .../ObjectStorageIteratorAsync.h | 6 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- .../DataLakes/DeltaLakeMetadataParser.h | 26 ---- src/Storages/DataLakes/HudiMetadataParser.h | 18 --- src/Storages/DataLakes/IStorageDataLake.h | 98 -------------- .../DataLakes/Iceberg/StorageIceberg.cpp | 11 -- src/Storages/DataLakes/StorageDeltaLake.h | 20 --- src/Storages/DataLakes/StorageHudi.h | 20 --- src/Storages/DataLakes/registerDataLakes.cpp | 50 ------- .../Configuration.cpp} | 49 +++---- .../Configuration.h} | 11 +- .../ObjectStorage/DataLakes/Common.cpp | 28 ++++ src/Storages/ObjectStorage/DataLakes/Common.h | 15 +++ .../DataLakes/DeltaLakeMetadata.cpp} | 110 +++++++-------- .../DataLakes/DeltaLakeMetadata.h | 48 +++++++ .../DataLakes/HudiMetadata.cpp} | 55 ++++---- .../ObjectStorage/DataLakes/HudiMetadata.h | 51 +++++++ .../DataLakes/IDataLakeMetadata.h | 19 +++ .../DataLakes/IStorageDataLake.h} | 58 ++++---- .../DataLakes}/IcebergMetadata.cpp | 36 ++--- .../DataLakes}/IcebergMetadata.h | 40 +++--- .../DataLakes/registerDataLakeStorages.cpp | 83 ++++++++++++ .../ObjectStorage/HDFS/Configuration.cpp | 57 ++++++++ .../ObjectStorage/HDFS/Configuration.h | 45 +++++++ .../ObjectStorage/HDFSConfiguration.h | 81 ----------- .../ObjectStorage/ReadBufferIterator.cpp | 4 +- .../ReadFromStorageObjectStorage.cpp | 1 - .../Configuration.cpp} | 30 +++-- .../{S3Configuration.h => S3/Configuration.h} | 15 ++- .../ObjectStorage/StorageObjectStorage.cpp | 10 +- .../ObjectStorage/StorageObjectStorage.h | 5 +- .../StorageObjectStorageCluster.cpp | 2 +- .../StorageObjectStorageCluster.h | 3 + .../StorageObjectStorageConfiguration.cpp | 2 +- ....h => StorageObjectStorageConfiguration.h} | 3 +- .../StorageObjectStorageSink.cpp | 127 ++++++++++++++++++ .../ObjectStorage/StorageObjectStorageSink.h | 113 ++-------------- .../StorageObjectStorageSource.cpp | 33 ++++- .../StorageObjectStorageSource.h | 22 +-- .../StorageObjectStorage_fwd_internal.h | 3 +- .../registerStorageObjectStorage.cpp | 12 +- src/Storages/ObjectStorageConfiguration.h | 0 src/Storages/S3Queue/S3QueueTableMetadata.h | 2 +- src/Storages/S3Queue/StorageS3Queue.cpp | 9 +- .../StorageSystemSchemaInferenceCache.cpp | 2 +- src/TableFunctions/ITableFunctionDataLake.h | 76 +++++++---- src/TableFunctions/TableFunctionDeltaLake.cpp | 33 ----- src/TableFunctions/TableFunctionHudi.cpp | 31 ----- src/TableFunctions/TableFunctionIceberg.cpp | 37 ----- .../TableFunctionObjectStorage.cpp | 22 ++- .../TableFunctionObjectStorage.h | 13 +- .../TableFunctionObjectStorageCluster.cpp | 8 +- .../registerDataLakeTableFunctions.cpp | 69 ++++++++++ src/TableFunctions/registerTableFunctions.cpp | 3 +- src/TableFunctions/registerTableFunctions.h | 10 +- 62 files changed, 946 insertions(+), 873 deletions(-) delete mode 100644 src/Storages/DataLakes/DeltaLakeMetadataParser.h delete mode 100644 src/Storages/DataLakes/HudiMetadataParser.h delete mode 100644 src/Storages/DataLakes/IStorageDataLake.h delete mode 100644 src/Storages/DataLakes/Iceberg/StorageIceberg.cpp delete mode 100644 src/Storages/DataLakes/StorageDeltaLake.h delete mode 100644 src/Storages/DataLakes/StorageHudi.h delete mode 100644 src/Storages/DataLakes/registerDataLakes.cpp rename src/Storages/ObjectStorage/{AzureConfiguration.cpp => AzureBlob/Configuration.cpp} (92%) rename src/Storages/ObjectStorage/{AzureConfiguration.h => AzureBlob/Configuration.h} (88%) create mode 100644 src/Storages/ObjectStorage/DataLakes/Common.cpp create mode 100644 src/Storages/ObjectStorage/DataLakes/Common.h rename src/Storages/{DataLakes/DeltaLakeMetadataParser.cpp => ObjectStorage/DataLakes/DeltaLakeMetadata.cpp} (79%) create mode 100644 src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h rename src/Storages/{DataLakes/HudiMetadataParser.cpp => ObjectStorage/DataLakes/HudiMetadata.cpp} (68%) create mode 100644 src/Storages/ObjectStorage/DataLakes/HudiMetadata.h create mode 100644 src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h rename src/Storages/{DataLakes/Iceberg/StorageIceberg.h => ObjectStorage/DataLakes/IStorageDataLake.h} (61%) rename src/Storages/{DataLakes/Iceberg => ObjectStorage/DataLakes}/IcebergMetadata.cpp (96%) rename src/Storages/{DataLakes/Iceberg => ObjectStorage/DataLakes}/IcebergMetadata.h (76%) create mode 100644 src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp create mode 100644 src/Storages/ObjectStorage/HDFS/Configuration.cpp create mode 100644 src/Storages/ObjectStorage/HDFS/Configuration.h delete mode 100644 src/Storages/ObjectStorage/HDFSConfiguration.h rename src/Storages/ObjectStorage/{S3Configuration.cpp => S3/Configuration.cpp} (97%) rename src/Storages/ObjectStorage/{S3Configuration.h => S3/Configuration.h} (81%) rename src/Storages/ObjectStorage/{StorageObejctStorageConfiguration.h => StorageObjectStorageConfiguration.h} (99%) create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSink.cpp delete mode 100644 src/Storages/ObjectStorageConfiguration.h delete mode 100644 src/TableFunctions/TableFunctionDeltaLake.cpp delete mode 100644 src/TableFunctions/TableFunctionHudi.cpp delete mode 100644 src/TableFunctions/TableFunctionIceberg.cpp create mode 100644 src/TableFunctions/registerDataLakeTableFunctions.cpp diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 99002c53769..9f1702cb3a3 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 9408c7ccdcf..c4c04bbc057 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #endif @@ -59,9 +59,6 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (!config.has(config_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no `{}` in config", config_prefix); - if (config.has(config_prefix + ".connection_string")) { configuration.connection_url = config.getString(config_prefix + ".connection_string"); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 50130e6abd0..118e0131b37 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -105,6 +105,7 @@ add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhous add_headers_and_sources(dbms Disks/IO) add_headers_and_sources(dbms Disks/ObjectStorages) +add_headers_and_sources(dbms Disks/ObjectStorages) if (TARGET ch_contrib::sqlite) add_headers_and_sources(dbms Databases/SQLite) endif() @@ -117,9 +118,11 @@ if (TARGET ch_contrib::nats_io) add_headers_and_sources(dbms Storages/NATS) endif() -add_headers_and_sources(dbms Storages/DataLakes) -add_headers_and_sources(dbms Storages/DataLakes/Iceberg) add_headers_and_sources(dbms Storages/ObjectStorage) +add_headers_and_sources(dbms Storages/ObjectStorage/AzureBlob) +add_headers_and_sources(dbms Storages/ObjectStorage/S3) +add_headers_and_sources(dbms Storages/ObjectStorage/HDFS) +add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes) add_headers_and_sources(dbms Common/NamedCollections) if (TARGET ch_contrib::amqp_cpp) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index bbbb5357505..bcc75f91e2a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -323,10 +323,8 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { removeObjectIfExists(object); } - } - ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const { auto client_ptr = client.get(); @@ -338,9 +336,9 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c { result.attributes.emplace(); for (const auto & [key, value] : properties.Metadata) - (*result.attributes)[key] = value; + result.attributes[key] = value; } - result.last_modified.emplace(static_cast(properties.LastModified).time_since_epoch().count()); + result.last_modified = static_cast(properties.LastModified).time_since_epoch().count(); return result; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index cc9ee3db505..9f9efad9615 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -404,26 +404,20 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles( { for (const auto & key : keys) { - auto meta = source_object_storage->getObjectMetadata(key); - auto object_attributes = meta.attributes; + auto metadata = source_object_storage->getObjectMetadata(key); + auto object_attributes = metadata.attributes; String path; - if (object_attributes.has_value()) + /// Restore file if object has 'path' in metadata. + auto path_entry = object_attributes.find("path"); + if (path_entry == object_attributes.end()) { - /// Restore file if object has 'path' in metadata. - auto path_entry = object_attributes->find("path"); - if (path_entry == object_attributes->end()) - { - /// Such keys can remain after migration, we can skip them. - LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); - continue; - } - - path = path_entry->second; - } - else + /// Such keys can remain after migration, we can skip them. + LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key); continue; + } + path = path_entry->second; disk->createDirectories(directoryPath(path)); auto object_key = ObjectStorageKey::createAsRelative(disk->object_key_prefix, shrinkKey(source_path, key)); @@ -435,7 +429,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles( source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage); auto tx = disk->metadata_storage->createTransaction(); - tx->addBlobToMetadata(path, object_key, meta.size_bytes); + tx->addBlobToMetadata(path, object_key, metadata.size_bytes); tx->commit(); LOG_TRACE(disk->log, "Restored file {}", path); @@ -490,7 +484,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject if (send_metadata) revision_counter = revision - 1; - auto object_attributes = *(source_object_storage->getObjectMetadata(object->relative_path).attributes); + auto object_attributes = source_object_storage->getObjectMetadata(object->relative_path).attributes; if (operation == rename) { auto from_path = object_attributes["from_path"]; diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 4955b0e6924..8a5352e71ca 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -47,8 +47,8 @@ using ObjectAttributes = std::map; struct ObjectMetadata { uint64_t size_bytes = 0; - std::optional last_modified; - std::optional attributes; + Poco::Timestamp last_modified; + ObjectAttributes attributes; }; struct RelativePathWithMetadata diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index 62bdd0ed0c8..f441b18d59d 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -11,18 +11,26 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +IObjectStorageIteratorAsync::IObjectStorageIteratorAsync( + CurrentMetrics::Metric threads_metric, + CurrentMetrics::Metric threads_active_metric, + CurrentMetrics::Metric threads_scheduled_metric, + const std::string & thread_name) + : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1) + , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, thread_name)) +{ +} + void IObjectStorageIteratorAsync::nextBatch() { std::lock_guard lock(mutex); if (is_finished) { - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 3"); current_batch.clear(); current_batch_iterator = current_batch.begin(); } else { - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 4"); if (!is_initialized) { outcome_future = scheduleBatch(); @@ -30,13 +38,23 @@ void IObjectStorageIteratorAsync::nextBatch() } chassert(outcome_future.valid()); - auto [batch, has_next] = outcome_future.get(); - current_batch = std::move(batch); + BatchAndHasNext result; + try + { + result = outcome_future.get(); + } + catch (...) + { + is_finished = true; + throw; + } + + current_batch = std::move(result.batch); current_batch_iterator = current_batch.begin(); accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - if (has_next) + if (result.has_next) outcome_future = scheduleBatch(); else is_finished = true; @@ -100,12 +118,10 @@ std::optional IObjectStorageIteratorAsync::getCurrent if (current_batch_iterator == current_batch.end()) { - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 2"); return std::nullopt; } auto temp_current_batch = std::move(current_batch); - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 1: {}", temp_current_batch.size()); nextBatch(); return temp_current_batch; } diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index 8d155f7ec8d..86e5feb3010 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -17,11 +17,7 @@ public: CurrentMetrics::Metric threads_metric, CurrentMetrics::Metric threads_active_metric, CurrentMetrics::Metric threads_scheduled_metric, - const std::string & thread_name) - : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1) - , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, thread_name)) - { - } + const std::string & thread_name); void next() override; void nextBatch() override; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index d697d90c8a6..36f5bd73ca6 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -480,7 +480,7 @@ BlockIO InterpreterSystemQuery::execute() StorageURL::getSchemaCache(getContext()).clear(); #if USE_AZURE_BLOB_STORAGE if (caches_to_drop.contains("AZURE")) - StorageAzureBlobStorage::getSchemaCache(getContext()).clear(); + StorageAzureBlob::getSchemaCache(getContext()).clear(); #endif break; } diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h deleted file mode 100644 index 251ea3e3f15..00000000000 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace DB -{ - -struct DeltaLakeMetadataParser -{ -public: - DeltaLakeMetadataParser(); - - Strings getFiles( - ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, - ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; -}; - -} diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h deleted file mode 100644 index 72766a95876..00000000000 --- a/src/Storages/DataLakes/HudiMetadataParser.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace DB -{ - -struct HudiMetadataParser -{ - Strings getFiles( - ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, ContextPtr context); -}; - -} diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h deleted file mode 100644 index 934bf227c42..00000000000 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -template -class IStorageDataLake : public StorageObjectStorage -{ -public: - static constexpr auto name = Name::name; - - using Storage = StorageObjectStorage; - using ConfigurationPtr = Storage::ConfigurationPtr; - - static StoragePtr create( - ConfigurationPtr base_configuration, - ContextPtr context, - const String & engine_name_, - const StorageID & table_id_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment_, - std::optional format_settings_, - bool /* attach */) - { - auto object_storage = base_configuration->createOrUpdateObjectStorage(context); - - auto configuration = base_configuration->clone(); - configuration->getPaths() = MetadataParser().getFiles(object_storage, configuration, context); - - return std::make_shared>( - base_configuration, configuration, object_storage, engine_name_, context, - table_id_, columns_, constraints_, comment_, format_settings_); - } - - String getName() const override { return name; } - - static ColumnsDescription getTableStructureFromData( - ObjectStoragePtr object_storage_, - ConfigurationPtr base_configuration, - const std::optional &, - ContextPtr local_context) - { - auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context); - return ColumnsDescription(metadata->getTableSchema()); - } - - std::pair updateConfigurationAndGetCopy(ContextPtr local_context) override - { - std::lock_guard lock(Storage::configuration_update_mutex); - - auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context); - bool updated = new_object_storage != nullptr; - if (updated) - Storage::object_storage = new_object_storage; - - auto new_keys = MetadataParser().getFiles(Storage::object_storage, base_configuration, local_context); - - if (updated || new_keys != Storage::configuration->getPaths()) - { - auto updated_configuration = base_configuration->clone(); - /// If metadata wasn't changed, we won't list data files again. - updated_configuration->getPaths() = new_keys; - Storage::configuration = updated_configuration; - } - return {Storage::configuration, Storage::object_storage}; - } - - template - explicit IStorageDataLake( - ConfigurationPtr base_configuration_, - Args &&... args) - : Storage(std::forward(args)...) - , base_configuration(base_configuration_) - { - } - -private: - ConfigurationPtr base_configuration; - LoggerPtr log; -}; - - -} - -#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp deleted file mode 100644 index ad1a27c312b..00000000000 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include - -#if USE_AWS_S3 && USE_AVRO - -namespace DB -{ - - -} - -#endif diff --git a/src/Storages/DataLakes/StorageDeltaLake.h b/src/Storages/DataLakes/StorageDeltaLake.h deleted file mode 100644 index 07c2205d2df..00000000000 --- a/src/Storages/DataLakes/StorageDeltaLake.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -namespace DB -{ - -struct StorageDeltaLakeName -{ - static constexpr auto name = "DeltaLake"; -}; - -#if USE_AWS_S3 && USE_PARQUET -using StorageDeltaLakeS3 = IStorageDataLake; -#endif - -} diff --git a/src/Storages/DataLakes/StorageHudi.h b/src/Storages/DataLakes/StorageHudi.h deleted file mode 100644 index 3fd52c82d32..00000000000 --- a/src/Storages/DataLakes/StorageHudi.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -namespace DB -{ - -struct StorageHudiName -{ - static constexpr auto name = "Hudi"; -}; - -#if USE_AWS_S3 -using StorageHudiS3 = IStorageDataLake; -#endif - -} diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp deleted file mode 100644 index 2647fbce39d..00000000000 --- a/src/Storages/DataLakes/registerDataLakes.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include - - -namespace DB -{ - -#if USE_PARQUET -void registerStorageDeltaLake(StorageFactory & ) -{ - // factory.registerStorage( - // StorageDeltaLakeName::name, - // [&](const StorageFactory::Arguments & args) - // { - // auto configuration = std::make_shared(); - // return IStorageDataLake::create( - // configuration, args.getContext(), "deltaLake", args.table_id, args.columns, - // args.constraints, args.comment, std::nullopt, args.attach); - // }, - // { - // .supports_settings = false, - // .supports_schema_inference = true, - // .source_access_type = AccessType::S3, - // }); -} -#endif - -#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. - -void registerStorageIceberg(StorageFactory &) -{ - // REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name) -} - -#endif - -void registerStorageHudi(StorageFactory &) -{ -} - -} - -#endif diff --git a/src/Storages/ObjectStorage/AzureConfiguration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp similarity index 92% rename from src/Storages/ObjectStorage/AzureConfiguration.cpp rename to src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 04f6f26111b..109918dfc8b 100644 --- a/src/Storages/ObjectStorage/AzureConfiguration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -1,4 +1,7 @@ -#include +#include + +#if USE_AZURE_BLOB_STORAGE + #include #include #include @@ -44,21 +47,19 @@ namespace return !candidate.starts_with("http"); } - bool containerExists(std::unique_ptr & blob_service_client, std::string container_name) + bool containerExists(Azure::Storage::Blobs::BlobServiceClient & blob_service_client, std::string container_name) { Azure::Storage::Blobs::ListBlobContainersOptions options; options.Prefix = container_name; options.PageSizeHint = 1; - auto containers_list_response = blob_service_client->ListBlobContainers(options); + auto containers_list_response = blob_service_client.ListBlobContainers(options); auto containers_list = containers_list_response.BlobContainers; - for (const auto & container : containers_list) - { - if (container_name == container.Name) - return true; - } - return false; + auto it = std::find_if( + containers_list.begin(), containers_list.end(), + [&](const auto & c) { return c.Name == container_name; }); + return it != containers_list.end(); } } @@ -76,19 +77,6 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check); } -StorageObjectStorageConfigurationPtr StorageAzureBlobConfiguration::clone() -{ - auto configuration = std::make_shared(); - configuration->connection_url = connection_url; - configuration->is_connection_string = is_connection_string; - configuration->account_name = account_name; - configuration->account_key = account_key; - configuration->container = container; - configuration->blob_path = blob_path; - configuration->blobs_paths = blobs_paths; - return configuration; -} - StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other) { connection_url = other.connection_url; @@ -98,6 +86,10 @@ StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureB container = other.container; blob_path = other.blob_path; blobs_paths = other.blobs_paths; + + format = other.format; + compression_method = other.compression_method; + structure = other.structure; } AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context) @@ -127,7 +119,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) { auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); - bool container_exists = containerExists(blob_service_client, container); + bool container_exists = containerExists(*blob_service_client, container); if (!container_exists) { @@ -140,10 +132,11 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) try { result->CreateIfNotExists(); - } catch (const Azure::Storage::StorageException & e) + } + catch (const Azure::Storage::StorageException & e) { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) + if (e.StatusCode != Azure::Core::Http::HttpStatusCode::Conflict + || e.ReasonPhrase != "The specified container already exists.") { throw; } @@ -169,7 +162,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) blob_service_client = std::make_unique(connection_url); } - bool container_exists = containerExists(blob_service_client, container); + bool container_exists = containerExists(*blob_service_client, container); std::string final_url; size_t pos = connection_url.find('?'); @@ -460,3 +453,5 @@ void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String } } + +#endif diff --git a/src/Storages/ObjectStorage/AzureConfiguration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h similarity index 88% rename from src/Storages/ObjectStorage/AzureConfiguration.h rename to src/Storages/ObjectStorage/AzureBlob/Configuration.h index 4f285128241..deeb365d012 100644 --- a/src/Storages/ObjectStorage/AzureConfiguration.h +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h @@ -1,6 +1,11 @@ #pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + #include -#include +#include namespace DB { @@ -26,8 +31,8 @@ public: String getNamespace() const override { return container; } void check(ContextPtr context) const override; - StorageObjectStorageConfigurationPtr clone() override; ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } void fromNamedCollection(const NamedCollection & collection) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; @@ -52,3 +57,5 @@ protected: }; } + +#endif diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp new file mode 100644 index 00000000000..5f0138078d4 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp @@ -0,0 +1,28 @@ +#include "Common.h" +#include +#include +#include + +namespace DB +{ + +std::vector listFiles( + const IObjectStorage & object_storage, + const StorageObjectStorageConfiguration & configuration, + const String & prefix, const String & suffix) +{ + auto key = std::filesystem::path(configuration.getPath()) / prefix; + RelativePathsWithMetadata files_with_metadata; + object_storage.listObjects(key, files_with_metadata, 0); + Strings res; + for (const auto & file_with_metadata : files_with_metadata) + { + const auto & filename = file_with_metadata->relative_path; + if (filename.ends_with(suffix)) + res.push_back(filename); + } + LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files", res.size()); + return res; +} + +} diff --git a/src/Storages/ObjectStorage/DataLakes/Common.h b/src/Storages/ObjectStorage/DataLakes/Common.h new file mode 100644 index 00000000000..ae3767f2eec --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/Common.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace DB +{ + +class IObjectStorage; +class StorageObjectStorageConfiguration; + +std::vector listFiles( + const IObjectStorage & object_storage, + const StorageObjectStorageConfiguration & configuration, + const String & prefix, const String & suffix); + +} diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp similarity index 79% rename from src/Storages/DataLakes/DeltaLakeMetadataParser.cpp rename to src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 55ff8fefdd5..903558b73ab 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include "config.h" #include @@ -15,8 +15,7 @@ #include #include #include - -namespace fs = std::filesystem; +#include namespace DB { @@ -27,12 +26,23 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -struct DeltaLakeMetadataParser::Impl +struct DeltaLakeMetadata::Impl final : private WithContext { + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + /** * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ + Impl(ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) + { + } /** * DeltaLake tables store metadata files and data files. @@ -62,13 +72,10 @@ struct DeltaLakeMetadataParser::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles( - ObjectStoragePtr object_storage, - const StorageObjectStorageConfiguration & configuration, - ContextPtr context) + std::set processMetadataFiles() { std::set result_files; - const auto checkpoint_version = getCheckpointIfExists(result_files, object_storage, configuration, context); + const auto checkpoint_version = getCheckpointIfExists(result_files); if (checkpoint_version) { @@ -76,12 +83,12 @@ struct DeltaLakeMetadataParser::Impl while (true) { const auto filename = withPadding(++current_version) + metadata_file_suffix; - const auto file_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / filename; + const auto file_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / filename; if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files, object_storage, configuration, context); + processMetadataFile(file_path, result_files); } LOG_TRACE( @@ -90,33 +97,14 @@ struct DeltaLakeMetadataParser::Impl } else { - const auto keys = listFiles(object_storage, configuration, deltalake_metadata_directory, metadata_file_suffix); + const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files, object_storage, configuration, context); + processMetadataFile(key, result_files); } return result_files; } - std::vector listFiles( - const ObjectStoragePtr & object_storage, - const StorageObjectStorageConfiguration & configuration, - const String & prefix, const String & suffix) - { - auto key = std::filesystem::path(configuration.getPath()) / prefix; - RelativePathsWithMetadata files_with_metadata; - object_storage->listObjects(key, files_with_metadata, 0); - Strings res; - for (const auto & file_with_metadata : files_with_metadata) - { - const auto & filename = file_with_metadata->relative_path; - if (filename.ends_with(suffix)) - res.push_back(filename); - } - LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size()); - return res; - } - /** * Example of content of a single .json metadata file: * " @@ -146,14 +134,9 @@ struct DeltaLakeMetadataParser::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile( - const String & key, - std::set & result, - ObjectStoragePtr object_storage, - const StorageObjectStorageConfiguration & configuration, - ContextPtr context) + void processMetadataFile(const String & key, std::set & result) { - auto read_settings = context->getReadSettings(); + auto read_settings = getContext()->getReadSettings(); auto buf = object_storage->readObject(StoredObject(key), read_settings); char c; @@ -176,12 +159,12 @@ struct DeltaLakeMetadataParser::Impl if (json.has("add")) { const auto path = json["add"]["path"].getString(); - result.insert(fs::path(configuration.getPath()) / path); + result.insert(fs::path(configuration->getPath()) / path); } else if (json.has("remove")) { const auto path = json["remove"]["path"].getString(); - result.erase(fs::path(configuration.getPath()) / path); + result.erase(fs::path(configuration->getPath()) / path); } } } @@ -199,17 +182,14 @@ struct DeltaLakeMetadataParser::Impl * * We need to get "version", which is the version of the checkpoint we need to read. */ - size_t readLastCheckpointIfExists( - ObjectStoragePtr object_storage, - const StorageObjectStorageConfiguration & configuration, - ContextPtr context) const + size_t readLastCheckpointIfExists() { - const auto last_checkpoint_file = fs::path(configuration.getPath()) / deltalake_metadata_directory / "_last_checkpoint"; + const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; if (!object_storage->exists(StoredObject(last_checkpoint_file))) return 0; String json_str; - auto read_settings = context->getReadSettings(); + auto read_settings = getContext()->getReadSettings(); auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings); readJSONObjectPossiblyInvalid(json_str, *buf); @@ -260,21 +240,18 @@ struct DeltaLakeMetadataParser::Impl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists( - std::set & result, - ObjectStoragePtr object_storage, - const StorageObjectStorageConfiguration & configuration, - ContextPtr context) + size_t getCheckpointIfExists(std::set & result) { - const auto version = readLastCheckpointIfExists(object_storage, configuration, context); + const auto version = readLastCheckpointIfExists(); if (!version) return 0; const auto checkpoint_filename = withPadding(version) + ".checkpoint.parquet"; - const auto checkpoint_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / checkpoint_filename; + const auto checkpoint_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename; LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string()); + auto context = getContext(); auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings); auto format_settings = getFormatSettings(context); @@ -334,7 +311,7 @@ struct DeltaLakeMetadataParser::Impl if (filename.empty()) continue; LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(fs::path(configuration.getPath()) / filename); + const auto [_, inserted] = result.insert(fs::path(configuration->getPath()) / filename); if (!inserted) throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); } @@ -345,15 +322,22 @@ struct DeltaLakeMetadataParser::Impl LoggerPtr log = getLogger("DeltaLakeMetadataParser"); }; -DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique()) {} - -Strings DeltaLakeMetadataParser::getFiles( - ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, - ContextPtr context) +DeltaLakeMetadata::DeltaLakeMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : impl(std::make_unique(object_storage_, configuration_, context_)) { - auto result = impl->processMetadataFiles(object_storage, *configuration, context); - return Strings(result.begin(), result.end()); +} + +Strings DeltaLakeMetadata::getDataFiles() const +{ + if (!data_files.empty()) + return data_files; + + auto result = impl->processMetadataFiles(); + data_files = Strings(result.begin(), result.end()); + return data_files; } } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h new file mode 100644 index 00000000000..1a5bb85586a --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class DeltaLakeMetadata final : public IDataLakeMetadata, private WithContext +{ +public: + using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + + static constexpr auto name = "DeltaLake"; + + DeltaLakeMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_); + + Strings getDataFiles() const override; + + NamesAndTypesList getTableSchema() const override { return {}; } + + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * deltalake_metadata = dynamic_cast(&other); + return deltalake_metadata && getDataFiles() == deltalake_metadata->getDataFiles(); + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) + { + return std::make_unique(object_storage, configuration, local_context); + } + +private: + struct Impl; + const std::shared_ptr impl; + mutable Strings data_files; +}; + +} diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp similarity index 68% rename from src/Storages/DataLakes/HudiMetadataParser.cpp rename to src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp index 8571c035b32..91a586ccbf9 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include #include #include @@ -40,33 +41,10 @@ namespace ErrorCodes * hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group. * Each file group is identified by File Id. */ -std::vector listFiles( - const ObjectStoragePtr & object_storage, - const StorageObjectStorageConfiguration & configuration, - const String & prefix, const String & suffix) +Strings HudiMetadata::getDataFilesImpl() const { - auto key = std::filesystem::path(configuration.getPath()) / prefix; - RelativePathsWithMetadata files_with_metadata; - object_storage->listObjects(key, files_with_metadata, 0); - Strings res; - for (const auto & file_with_metadata : files_with_metadata) - { - const auto & filename = file_with_metadata->relative_path; - if (filename.ends_with(suffix)) - res.push_back(filename); - } - LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size()); - return res; -} - -Strings HudiMetadataParser::getFiles( - ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, - ContextPtr) -{ - auto log = getLogger("HudiMetadataParser"); - - const auto keys = listFiles(object_storage, *configuration, "", Poco::toLower(configuration->format)); + auto log = getLogger("HudiMetadata"); + const auto keys = listFiles(*object_storage, *configuration, "", Poco::toLower(configuration->format)); using Partition = std::string; using FileID = std::string; @@ -75,7 +53,7 @@ Strings HudiMetadataParser::getFiles( String key; UInt64 timestamp = 0; }; - std::unordered_map> data_files; + std::unordered_map> files; for (const auto & key : keys) { @@ -90,7 +68,7 @@ Strings HudiMetadataParser::getFiles( const auto & file_id = file_parts[0]; const auto timestamp = parse(file_parts[2]); - auto & file_info = data_files[partition][file_id]; + auto & file_info = files[partition][file_id]; if (file_info.timestamp == 0 || file_info.timestamp < timestamp) { file_info.key = key; @@ -99,7 +77,7 @@ Strings HudiMetadataParser::getFiles( } Strings result; - for (auto & [partition, partition_data] : data_files) + for (auto & [partition, partition_data] : files) { LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size()); for (auto & [file_id, file_data] : partition_data) @@ -108,4 +86,21 @@ Strings HudiMetadataParser::getFiles( return result; } +HudiMetadata::HudiMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_) + : WithContext(context_) + , object_storage(object_storage_) + , configuration(configuration_) +{ +} + +Strings HudiMetadata::getDataFiles() const +{ + if (data_files.empty()) + data_files = getDataFilesImpl(); + return data_files; +} + } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h new file mode 100644 index 00000000000..ee8b1ea4978 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class HudiMetadata final : public IDataLakeMetadata, private WithContext +{ +public: + using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + + static constexpr auto name = "Hudi"; + + HudiMetadata( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + ContextPtr context_); + + Strings getDataFiles() const override; + + NamesAndTypesList getTableSchema() const override { return {}; } + + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * hudi_metadata = dynamic_cast(&other); + return hudi_metadata && getDataFiles() == hudi_metadata->getDataFiles(); + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context) + { + return std::make_unique(object_storage, configuration, local_context); + } + +private: + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + mutable Strings data_files; + + Strings getDataFilesImpl() const; +}; + +} diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h new file mode 100644 index 00000000000..a2bd5adb947 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IDataLakeMetadata : boost::noncopyable +{ +public: + virtual ~IDataLakeMetadata() = default; + virtual Strings getDataFiles() const = 0; + virtual NamesAndTypesList getTableSchema() const = 0; + virtual bool operator==(const IDataLakeMetadata & other) const = 0; +}; +using DataLakeMetadataPtr = std::unique_ptr; + +} diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h similarity index 61% rename from src/Storages/DataLakes/Iceberg/StorageIceberg.h rename to src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index bca6e3c868f..95196cdd000 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -5,11 +5,13 @@ #if USE_AWS_S3 && USE_AVRO #include -#include #include #include #include -#include +#include +#include +#include +#include #include @@ -19,13 +21,10 @@ namespace DB /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Right now it's implemented on top of StorageS3 and right now it doesn't support /// many Iceberg features like schema evolution, partitioning, positional and equality deletes. -/// TODO: Implement Iceberg as a separate storage using IObjectStorage -/// (to support all object storages, not only S3) and add support for missing Iceberg features. -template -class StorageIceberg : public StorageObjectStorage +template +class IStorageDataLake final : public StorageObjectStorage { public: - static constexpr auto name = "Iceberg"; using Storage = StorageObjectStorage; using ConfigurationPtr = Storage::ConfigurationPtr; @@ -41,12 +40,14 @@ public: bool attach) { auto object_storage = base_configuration->createOrUpdateObjectStorage(context); - std::unique_ptr metadata; + DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + ConfigurationPtr configuration = base_configuration->clone(); try { - metadata = parseIcebergMetadata(object_storage, base_configuration, context); + metadata = DataLakeMetadata::create(object_storage, base_configuration, context); schema_from_metadata = metadata->getTableSchema(); + configuration->getPaths() = metadata->getDataFiles(); } catch (...) { @@ -55,17 +56,14 @@ public: tryLogCurrentException(__PRETTY_FUNCTION__); } - auto configuration = base_configuration->clone(); - configuration->getPaths() = metadata->getDataFiles(); - - return std::make_shared>( + return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, engine_name_, context, table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } - String getName() const override { return name; } + String getName() const override { return DataLakeMetadata::name; } static ColumnsDescription getTableStructureFromData( ObjectStoragePtr object_storage_, @@ -73,7 +71,7 @@ public: const std::optional &, ContextPtr local_context) { - auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context); + auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); return ColumnsDescription(metadata->getTableSchema()); } @@ -86,24 +84,25 @@ public: if (updated) Storage::object_storage = new_object_storage; - auto new_metadata = parseIcebergMetadata(Storage::object_storage, base_configuration, local_context); + auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) + if (!current_metadata || !(*current_metadata == *new_metadata)) current_metadata = std::move(new_metadata); - else if (updated) - { - auto updated_configuration = base_configuration->clone(); - /// If metadata wasn't changed, we won't list data files again. - updated_configuration->getPaths() = current_metadata->getDataFiles(); - Storage::configuration = updated_configuration; - } + else if (!updated) + return {Storage::configuration, Storage::object_storage}; + + auto updated_configuration = base_configuration->clone(); + /// If metadata wasn't changed, we won't list data files again. + updated_configuration->getPaths() = current_metadata->getDataFiles(); + Storage::configuration = updated_configuration; + return {Storage::configuration, Storage::object_storage}; } template - StorageIceberg( + IStorageDataLake( ConfigurationPtr base_configuration_, - std::unique_ptr metadata_, + DataLakeMetadataPtr metadata_, Args &&... args) : Storage(std::forward(args)...) , base_configuration(base_configuration_) @@ -113,8 +112,13 @@ public: private: ConfigurationPtr base_configuration; - std::unique_ptr current_metadata; + DataLakeMetadataPtr current_metadata; }; + +using StorageIceberg = IStorageDataLake; +using StorageDeltaLake = IStorageDataLake; +using StorageHudi = IStorageDataLake; + } #endif diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp similarity index 96% rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp index 5543e60e7a7..8ee6f002ca6 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp @@ -24,7 +24,8 @@ #include #include #include -#include +#include +#include #include #include @@ -332,25 +333,6 @@ MutableColumns parseAvro( return columns; } -std::vector listFiles( - const ObjectStoragePtr & object_storage, - const StorageObjectStorageConfiguration & configuration, - const String & prefix, const String & suffix) -{ - auto key = std::filesystem::path(configuration.getPath()) / prefix; - RelativePathsWithMetadata files_with_metadata; - object_storage->listObjects(key, files_with_metadata, 0); - Strings res; - for (const auto & file_with_metadata : files_with_metadata) - { - const auto & filename = file_with_metadata->relative_path; - if (filename.ends_with(suffix)) - res.push_back(filename); - } - LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size()); - return res; -} - /** * Each version of table metadata is stored in a `metadata` directory and * has one of 2 formats: @@ -361,7 +343,7 @@ std::pair getMetadataFileAndVersion( ObjectStoragePtr object_storage, const StorageObjectStorageConfiguration & configuration) { - const auto metadata_files = listFiles(object_storage, configuration, "metadata", ".metadata.json"); + const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json"); if (metadata_files.empty()) { throw Exception( @@ -394,14 +376,14 @@ std::pair getMetadataFileAndVersion( } -std::unique_ptr parseIcebergMetadata( +DataLakeMetadataPtr IcebergMetadata::create( ObjectStoragePtr object_storage, StorageObjectStorageConfigurationPtr configuration, - ContextPtr context_) + ContextPtr local_context) { const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration); LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path); - auto read_settings = context_->getReadSettings(); + auto read_settings = local_context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); String json_str; readJSONObjectPossiblyInvalid(json_str, *buf); @@ -411,7 +393,7 @@ std::unique_ptr parseIcebergMetadata( Poco::JSON::Object::Ptr object = json.extract(); auto format_version = object->getValue("format-version"); - auto [schema, schema_id] = parseTableSchema(object, format_version, context_->getSettingsRef().iceberg_engine_ignore_schema_evolution); + auto [schema, schema_id] = parseTableSchema(object, format_version, local_context->getSettingsRef().iceberg_engine_ignore_schema_evolution); auto current_snapshot_id = object->getValue("current-snapshot-id"); auto snapshots = object->get("snapshots").extract(); @@ -428,7 +410,7 @@ std::unique_ptr parseIcebergMetadata( } } - return std::make_unique(object_storage, configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema); + return std::make_unique(object_storage, configuration, local_context, metadata_version, format_version, manifest_list_file, schema_id, schema); } /** @@ -456,7 +438,7 @@ std::unique_ptr parseIcebergMetadata( * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=2/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00003.parquet','PARQUET',(2),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'3')],[(1,'\0\0\0\0\0\0\0'),(2,'3')],NULL,[4],0) │ * └────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ */ -Strings IcebergMetadata::getDataFiles() +Strings IcebergMetadata::getDataFiles() const { if (!data_files.empty()) return data_files; diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h similarity index 76% rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.h rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index a289715848f..f88e3eecc67 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include namespace DB { @@ -57,12 +58,16 @@ namespace DB * "metadata-log" : [ ] * } */ -class IcebergMetadata : WithContext +class IcebergMetadata : public IDataLakeMetadata, private WithContext { public: + using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + + static constexpr auto name = "Iceberg"; + IcebergMetadata( ObjectStoragePtr object_storage_, - StorageObjectStorageConfigurationPtr configuration_, + ConfigurationPtr configuration_, ContextPtr context_, Int32 metadata_version_, Int32 format_version_, @@ -72,31 +77,36 @@ public: /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files. /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file) - Strings getDataFiles(); + Strings getDataFiles() const override; /// Get table schema parsed from metadata. - NamesAndTypesList getTableSchema() const { return schema; } + NamesAndTypesList getTableSchema() const override { return schema; } - size_t getVersion() const { return metadata_version; } + bool operator ==(const IDataLakeMetadata & other) const override + { + const auto * iceberg_metadata = dynamic_cast(&other); + return iceberg_metadata && getVersion() == iceberg_metadata->getVersion(); + } + + static DataLakeMetadataPtr create( + ObjectStoragePtr object_storage, + ConfigurationPtr configuration, + ContextPtr local_context); private: - ObjectStoragePtr object_storage; - StorageObjectStorageConfigurationPtr configuration; + size_t getVersion() const { return metadata_version; } + + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; Int32 metadata_version; Int32 format_version; String manifest_list_file; Int32 current_schema_id; NamesAndTypesList schema; - Strings data_files; + mutable Strings data_files; LoggerPtr log; - }; -std::unique_ptr parseIcebergMetadata( - ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, - ContextPtr context); - } #endif diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp new file mode 100644 index 00000000000..d93c14dfe32 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp @@ -0,0 +1,83 @@ +#include "config.h" + +#if USE_AWS_S3 + +#include +#include +#include +#include +#include + + +namespace DB +{ + +#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. + +void registerStorageIceberg(StorageFactory & factory) +{ + factory.registerStorage( + "Iceberg", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageIceberg::create( + configuration, args.getContext(), "Iceberg", args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.attach); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +#endif + +#if USE_PARQUET +void registerStorageDeltaLake(StorageFactory & factory) +{ + factory.registerStorage( + "DeltaLake", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageDeltaLake::create( + configuration, args.getContext(), "DeltaLake", args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.attach); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} +#endif + +void registerStorageHudi(StorageFactory & factory) +{ + factory.registerStorage( + "Hudi", + [&](const StorageFactory::Arguments & args) + { + auto configuration = std::make_shared(); + StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + + return StorageHudi::create( + configuration, args.getContext(), "Hudi", args.table_id, args.columns, + args.constraints, args.comment, std::nullopt, args.attach); + }, + { + .supports_settings = false, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +} + +#endif diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp new file mode 100644 index 00000000000..c80237b3055 --- /dev/null +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -0,0 +1,57 @@ +#include + +#if USE_HDFS +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) +{ + url = other.url; + path = other.path; + paths = other.paths; + format = other.format; + compression_method = other.compression_method; + structure = other.structure; +} + +void StorageHDFSConfiguration::check(ContextPtr context) const +{ + context->getRemoteHostFilter().checkURL(Poco::URI(url)); + checkHDFSURL(url); +} + +ObjectStoragePtr StorageHDFSConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +{ + UNUSED(is_readonly); + auto settings = std::make_unique(); + return std::make_shared(url, std::move(settings), context->getConfigRef()); +} + +void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_structure */) +{ + url = checkAndGetLiteralArgument(args[0], "url"); + + String format_name = "auto"; + if (args.size() > 1) + format_name = checkAndGetLiteralArgument(args[1], "format_name"); + + if (format_name == "auto") + format_name = FormatFactory::instance().getFormatFromFileName(url, true); + + String compression_method; + if (args.size() == 3) + compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); + else + compression_method = "auto"; + +} +} + +#endif diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h new file mode 100644 index 00000000000..03fb0824123 --- /dev/null +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -0,0 +1,45 @@ +#pragma once +#include "config.h" + +#if USE_HDFS +#include +#include +#include +#include + +namespace DB +{ + +class StorageHDFSConfiguration : public StorageObjectStorageConfiguration +{ +public: + StorageHDFSConfiguration() = default; + StorageHDFSConfiguration(const StorageHDFSConfiguration & other); + + Path getPath() const override { return path; } + void setPath(const Path & path_) override { path = path_; } + + const Paths & getPaths() const override { return paths; } + Paths & getPaths() override { return paths; } + + String getNamespace() const override { return ""; } + String getDataSourceDescription() override { return url; } + + void check(ContextPtr context) const override; + ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } + + void fromNamedCollection(const NamedCollection &) override {} + void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; + + static void addStructureToArgs(ASTs &, const String &, ContextPtr) {} + +private: + String url; + String path; + std::vector paths; +}; + +} + +#endif diff --git a/src/Storages/ObjectStorage/HDFSConfiguration.h b/src/Storages/ObjectStorage/HDFSConfiguration.h deleted file mode 100644 index aa45c634042..00000000000 --- a/src/Storages/ObjectStorage/HDFSConfiguration.h +++ /dev/null @@ -1,81 +0,0 @@ -#pragma once -#include "config.h" - -#if USE_HDFS - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -class StorageHDFSConfiguration : public StorageObjectStorageConfiguration -{ -public: - Path getPath() const override { return path; } - void setPath(const Path & path_) override { path = path_; } - - const Paths & getPaths() const override { return paths; } - Paths & getPaths() override { return paths; } - - String getNamespace() const override { return ""; } - String getDataSourceDescription() override { return url; } - - void check(ContextPtr context) const override - { - context->getRemoteHostFilter().checkURL(Poco::URI(url)); - checkHDFSURL(url); - } - StorageObjectStorageConfigurationPtr clone() override - { - auto configuration = std::make_shared(); - return configuration; - } - - ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override /// NOLINT - { - UNUSED(is_readonly); - auto settings = std::make_unique(); - return std::make_shared(url, std::move(settings), context->getConfigRef()); - } - - void fromNamedCollection(const NamedCollection &) override {} - void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override - { - url = checkAndGetLiteralArgument(args[0], "url"); - - String format_name = "auto"; - if (args.size() > 1) - format_name = checkAndGetLiteralArgument(args[1], "format_name"); - - if (format_name == "auto") - format_name = FormatFactory::instance().getFormatFromFileName(url, true); - - String compression_method; - if (args.size() == 3) - { - compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); - } else compression_method = "auto"; - - } - static void addStructureToArgs(ASTs &, const String &, ContextPtr) {} - -private: - String url; - String path; - std::vector paths; -}; - -} - -#endif diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index dcdf36dbcf5..a3e19b907bc 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -67,11 +67,11 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( auto get_last_mod_time = [&] -> std::optional { if (object_info->metadata) - return object_info->metadata->last_modified->epochMicroseconds(); + return object_info->metadata->last_modified.epochMicroseconds(); else { object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); - return object_info->metadata->last_modified->epochMicroseconds(); + return object_info->metadata->last_modified.epochMicroseconds(); } }; diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp index 2c27c816078..b33eea7d354 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp @@ -59,7 +59,6 @@ void ReadFromStorageObejctStorage::applyFilters() const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - createIterator(predicate); } diff --git a/src/Storages/ObjectStorage/S3Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp similarity index 97% rename from src/Storages/ObjectStorage/S3Configuration.cpp rename to src/Storages/ObjectStorage/S3/Configuration.cpp index 5a5412019f5..f057745d669 100644 --- a/src/Storages/ObjectStorage/S3Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -1,4 +1,7 @@ -#include +#include + +#if USE_AWS_S3 + #include #include #include @@ -14,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; } static const std::unordered_set required_configuration_keys = { @@ -51,17 +55,19 @@ void StorageS3Configuration::check(ContextPtr context) const context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); } -StorageObjectStorageConfigurationPtr StorageS3Configuration::clone() +StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) { - auto configuration = std::make_shared(); - configuration->url = url; - configuration->auth_settings = auth_settings; - configuration->request_settings = request_settings; - configuration->static_configuration = static_configuration; - configuration->headers_from_ast = headers_from_ast; - configuration->keys = keys; - configuration->initialized = initialized; - return configuration; + url = other.url; + auth_settings = other.auth_settings; + request_settings = other.request_settings; + static_configuration = other.static_configuration; + headers_from_ast = other.headers_from_ast; + keys = other.keys; + initialized = other.initialized; + + format = other.format; + compression_method = other.compression_method; + structure = other.structure; } ObjectStoragePtr StorageS3Configuration::createOrUpdateObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT @@ -489,3 +495,5 @@ void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & stru } } + +#endif diff --git a/src/Storages/ObjectStorage/S3Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h similarity index 81% rename from src/Storages/ObjectStorage/S3Configuration.h rename to src/Storages/ObjectStorage/S3/Configuration.h index c953bc25c4e..037cf2eae87 100644 --- a/src/Storages/ObjectStorage/S3Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -1,7 +1,12 @@ #pragma once + +#include "config.h" + +#if USE_AWS_S3 + #include #include -#include +#include namespace DB { @@ -9,6 +14,9 @@ namespace DB class StorageS3Configuration : public StorageObjectStorageConfiguration { public: + StorageS3Configuration() = default; + StorageS3Configuration(const StorageS3Configuration & other); + Path getPath() const override { return url.key; } void setPath(const Path & path) override { url.key = path; } @@ -19,9 +27,8 @@ public: String getDataSourceDescription() override; void check(ContextPtr context) const override; - StorageObjectStorageConfigurationPtr clone() override; - ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } void fromNamedCollection(const NamedCollection & collection) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; @@ -44,3 +51,5 @@ private: }; } + +#endif diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 9a7260ea47c..08d7c9d0014 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -24,8 +24,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; } @@ -59,7 +57,6 @@ std::unique_ptr getStorageMetadata( storage_metadata->setColumns(columns); } - storage_metadata->setConstraints(constraints); storage_metadata->setComment(comment); return storage_metadata; @@ -264,10 +261,7 @@ SinkToStoragePtr StorageObjectStorage::write( template void StorageObjectStorage::truncate( - const ASTPtr &, - const StorageMetadataPtr &, - ContextPtr, - TableExclusiveLockHolder &) + const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) { if (configuration->withGlobs()) { diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 0b29845ba5c..6f18153c7af 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -95,8 +95,7 @@ public: ContextPtr context); protected: - virtual std::pair - updateConfigurationAndGetCopy(ContextPtr local_context); + virtual std::pair updateConfigurationAndGetCopy(ContextPtr local_context); const std::string engine_name; const NamesAndTypesList virtual_columns; @@ -110,7 +109,7 @@ protected: }; using StorageS3 = StorageObjectStorage; -using StorageAzureBlobStorage = StorageObjectStorage; +using StorageAzureBlob = StorageObjectStorage; using StorageHDFS = StorageObjectStorage; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 39cd5d8eca6..c03bbd1a45d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index aae8f704a73..507de20e888 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -67,5 +67,8 @@ using StorageS3Cluster = StorageObjectStorageCluster; #endif +#if USE_HDFS +using StorageHDFSCluster = StorageObjectStorageCluster; +#endif } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 2d5760ed9d8..651f1d25ec1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB diff --git a/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h similarity index 99% rename from src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h rename to src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 427d6a8d453..04b2d8e8fd9 100644 --- a/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -39,9 +39,8 @@ public: std::string getPathWithoutGlob() const; virtual void check(ContextPtr context) const = 0; - virtual StorageObjectStorageConfigurationPtr clone() = 0; - virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT + virtual StorageObjectStorageConfigurationPtr clone() = 0; String format = "auto"; String compression_method = "auto"; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp new file mode 100644 index 00000000000..37f93a2b82f --- /dev/null +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -0,0 +1,127 @@ +#include "StorageObjectStorageSink.h" +#include +#include + +namespace DB +{ + +StorageObjectStorageSink::StorageObjectStorageSink( + ObjectStoragePtr object_storage, + StorageObjectStorageConfigurationPtr configuration, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context, + const std::string & blob_path) + : SinkToStorage(sample_block_) + , sample_block(sample_block_) + , format_settings(format_settings_) +{ + const auto & settings = context->getSettingsRef(); + const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path; + const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method); + + auto buffer = object_storage->writeObject( + StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings()); + + write_buf = wrapWriteBufferWithCompressionMethod( + std::move(buffer), + chosen_compression_method, + static_cast(settings.output_format_compression_level), + static_cast(settings.output_format_compression_zstd_window_log)); + + writer = FormatFactory::instance().getOutputFormatParallelIfPossible( + configuration->format, *write_buf, sample_block, context, format_settings); +} + +void StorageObjectStorageSink::consume(Chunk chunk) +{ + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); +} + +void StorageObjectStorageSink::onCancel() +{ + std::lock_guard lock(cancel_mutex); + finalize(); + cancelled = true; +} + +void StorageObjectStorageSink::onException(std::exception_ptr exception) +{ + std::lock_guard lock(cancel_mutex); + try + { + std::rethrow_exception(exception); + } + catch (...) + { + /// An exception context is needed to proper delete write buffers without finalization. + release(); + } +} + +void StorageObjectStorageSink::onFinish() +{ + std::lock_guard lock(cancel_mutex); + finalize(); +} + +void StorageObjectStorageSink::finalize() +{ + if (!writer) + return; + + try + { + writer->finalize(); + writer->flush(); + write_buf->finalize(); + } + catch (...) + { + /// Stop ParallelFormattingOutputFormat correctly. + release(); + throw; + } +} + +void StorageObjectStorageSink::release() +{ + writer.reset(); + write_buf->finalize(); +} + +PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( + ObjectStoragePtr object_storage_, + StorageObjectStorageConfigurationPtr configuration_, + std::optional format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ASTPtr & partition_by) + : PartitionedSink(partition_by, context_, sample_block_) + , object_storage(object_storage_) + , configuration(configuration_) + , format_settings(format_settings_) + , sample_block(sample_block_) + , context(context_) +{ +} + +SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String & partition_id) +{ + auto blob = configuration->getPaths().back(); + auto partition_key = replaceWildcards(blob, partition_id); + validatePartitionKey(partition_key, true); + return std::make_shared( + object_storage, + configuration, + format_settings, + sample_block, + context, + partition_key + ); +} + +} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index a2d42d7fa9f..14298376d0e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -1,9 +1,8 @@ #pragma once #include -#include -#include +#include #include -#include +#include namespace DB { @@ -16,64 +15,17 @@ public: std::optional format_settings_, const Block & sample_block_, ContextPtr context, - const std::string & blob_path = "") - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) - { - const auto & settings = context->getSettingsRef(); - const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path; - const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method); - - auto buffer = object_storage->writeObject( - StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings()); - - write_buf = wrapWriteBufferWithCompressionMethod( - std::move(buffer), - chosen_compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - - writer = FormatFactory::instance().getOutputFormatParallelIfPossible( - configuration->format, *write_buf, sample_block, context, format_settings); - } + const std::string & blob_path = ""); String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } + void consume(Chunk chunk) override; - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } + void onCancel() override; - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization. - release(); - } - } + void onException(std::exception_ptr exception) override; - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } + void onFinish() override; private: const Block sample_block; @@ -84,30 +36,8 @@ private: bool cancelled = false; std::mutex cancel_mutex; - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } + void finalize(); + void release(); }; class PartitionedStorageObjectStorageSink : public PartitionedSink @@ -119,30 +49,9 @@ public: std::optional format_settings_, const Block & sample_block_, ContextPtr context_, - const ASTPtr & partition_by) - : PartitionedSink(partition_by, context_, sample_block_) - , object_storage(object_storage_) - , configuration(configuration_) - , format_settings(format_settings_) - , sample_block(sample_block_) - , context(context_) - { - } + const ASTPtr & partition_by); - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto blob = configuration->getPaths().back(); - auto partition_key = replaceWildcards(blob, partition_id); - validatePartitionKey(partition_key, true); - return std::make_shared( - object_storage, - configuration, - format_settings, - sample_block, - context, - partition_key - ); - } + SinkPtr createSinkForPartition(const String & partition_id) override; private: ObjectStoragePtr object_storage; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index f170a46112f..1fda75897f9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -26,6 +26,8 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_COMPILE_REGEXP; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } StorageObjectStorageSource::StorageObjectStorageSource( @@ -182,8 +184,8 @@ std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const O auto get_last_mod_time = [&]() -> std::optional { - return object_info->metadata && object_info->metadata->last_modified - ? object_info->metadata->last_modified->epochMicroseconds() + return object_info->metadata + ? object_info->metadata->last_modified.epochMicroseconds() : 0; }; return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); @@ -472,4 +474,29 @@ ObjectInfoPtr StorageObjectStorageSource::KeysIterator::next(size_t /* processor return std::make_shared(key, metadata); } +StorageObjectStorageSource::ReaderHolder::ReaderHolder( + ObjectInfoPtr object_info_, + std::unique_ptr read_buf_, + std::shared_ptr source_, + std::unique_ptr pipeline_, + std::unique_ptr reader_) + : object_info(std::move(object_info_)) + , read_buf(std::move(read_buf_)) + , source(std::move(source_)) + , pipeline(std::move(pipeline_)) + , reader(std::move(reader_)) +{ +} + +StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept +{ + /// The order of destruction is important. + /// reader uses pipeline, pipeline uses read_buf. + reader = std::move(other.reader); + pipeline = std::move(other.pipeline); + source = std::move(other.source); + read_buf = std::move(other.read_buf); + object_info = std::move(other.object_info); + return *this; +} } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 0d6a6b71271..214a7de14d6 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -75,32 +75,16 @@ protected: std::unique_ptr read_buf_, std::shared_ptr source_, std::unique_ptr pipeline_, - std::unique_ptr reader_) - : object_info(std::move(object_info_)) - , read_buf(std::move(read_buf_)) - , source(std::move(source_)) - , pipeline(std::move(pipeline_)) - , reader(std::move(reader_)) {} + std::unique_ptr reader_); ReaderHolder() = default; ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } + ReaderHolder & operator=(ReaderHolder && other) noexcept; explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - ReaderHolder & operator=(ReaderHolder && other) noexcept - { - /// The order of destruction is important. - /// reader uses pipeline, pipeline uses read_buf. - reader = std::move(other.reader); - pipeline = std::move(other.pipeline); - source = std::move(other.source); - read_buf = std::move(other.read_buf); - object_info = std::move(other.object_info); - return *this; - } - const String & getRelativePath() const { return object_info->relative_path; } const ObjectInfo & getObjectInfo() const { return *object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } @@ -143,7 +127,7 @@ public: size_t estimatedKeysCount() override { return 0; } /// TODO FIXME - ObjectInfoPtr next(size_t) override { return std::make_shared( callback(), ObjectMetadata{} ); } + ObjectInfoPtr next(size_t) override { return std::make_shared(callback(), ObjectMetadata{}); } private: ReadTaskCallback callback; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h index 51be7419e1c..241e2f20962 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h @@ -1,4 +1,5 @@ -#include +#pragma once +#include namespace DB { diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index f7ab37490e1..e23457c04e9 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include #include #include @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + template static std::shared_ptr> createStorageObjectStorage( const StorageFactory::Arguments & args, @@ -149,6 +154,7 @@ void registerStorageObjectStorage(StorageFactory & factory) #if USE_HDFS registerStorageHDFS(factory); #endif + UNUSED(factory); } } diff --git a/src/Storages/ObjectStorageConfiguration.h b/src/Storages/ObjectStorageConfiguration.h deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index 70dd8f27d71..9502a3c5e70 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -3,7 +3,7 @@ #if USE_AWS_S3 #include -#include +#include #include namespace DB diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index fc4ef77ebb9..b03224cedff 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -36,6 +36,13 @@ namespace ProfileEvents extern const Event S3ListObjects; } +namespace CurrentMetrics +{ + extern const Metric ObjectStorageS3Threads; + extern const Metric ObjectStorageS3ThreadsActive; + extern const Metric ObjectStorageS3ThreadsScheduled; +} + namespace DB { diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 77d5be3698c..a53ce440c3f 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -81,7 +81,7 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); #if USE_AZURE_BLOB_STORAGE - fillDataImpl(res_columns, StorageAzureBlobStorage::getSchemaCache(context), "Azure"); /// FIXME + fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); /// FIXME #endif } diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 0ffa1460d78..8edba4e6e4b 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -1,18 +1,17 @@ #pragma once #include "config.h" - -#if USE_AWS_S3 - -# include -# include -# include -# include -# include -# include -#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include + namespace DB { @@ -26,18 +25,20 @@ public: protected: StoragePtr executeImpl( - const ASTPtr & /*ast_function*/, + const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/, + ColumnsDescription cached_columns, bool /*is_insert_query*/) const override { ColumnsDescription columns; - if (TableFunction::configuration->structure != "auto") - columns = parseColumnsListFromString(TableFunction::configuration->structure, context); + auto configuration = TableFunction::getConfiguration(); + if (configuration->structure != "auto") + columns = parseColumnsListFromString(configuration->structure, context); + else if (!cached_columns.empty()) + columns = cached_columns; - StorageObjectStorageConfigurationPtr configuration = TableFunction::configuration; - StoragePtr storage = StorageIceberg>::create( + StoragePtr storage = Storage::create( configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt, false); @@ -45,26 +46,53 @@ protected: return storage; } - const char * getStorageTypeName() const override { return Storage::name; } + const char * getStorageTypeName() const override { return name; } - ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override { - if (TableFunction::configuration->structure == "auto") + auto configuration = TableFunction::getConfiguration(); + if (configuration->structure == "auto") { context->checkAccess(TableFunction::getSourceAccessType()); - return Storage::getTableStructureFromData(TableFunction::object_storage, TableFunction::configuration, std::nullopt, context); + auto object_storage = TableFunction::getObjectStorage(context, !is_insert_query); + return Storage::getTableStructureFromData(object_storage, configuration, std::nullopt, context); } - return parseColumnsListFromString(TableFunction::configuration->structure, context); + return parseColumnsListFromString(configuration->structure, context); } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override { + auto configuration = TableFunction::getConfiguration(); + configuration->format = "Parquet"; /// Set default format to Parquet if it's not specified in arguments. - TableFunction::configuration->format = "Parquet"; TableFunction::parseArguments(ast_function, context); } }; -} +struct TableFunctionIcebergName +{ + static constexpr auto name = "iceberg"; +}; + +struct TableFunctionDeltaLakeName +{ + static constexpr auto name = "deltaLake"; +}; + +struct TableFunctionHudiName +{ + static constexpr auto name = "hudi"; +}; + +#if USE_AWS_S3 +#if USE_AVRO +using TableFunctionIceberg = ITableFunctionDataLake; #endif +#if USE_PARQUET +using TableFunctionDeltaLake = ITableFunctionDataLake; +#endif +using TableFunctionHudi = ITableFunctionDataLake; +#endif + +} diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp deleted file mode 100644 index 08b62ed2612..00000000000 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_PARQUET - -#include -#include -#include -#include -#include "registerTableFunctions.h" - -namespace DB -{ - -struct TableFunctionDeltaLakeName -{ - static constexpr auto name = "deltaLake"; -}; - -// using TableFunctionDeltaLake = ITableFunctionDataLake; -// -// void registerTableFunctionDeltaLake(TableFunctionFactory & factory) -// { -// factory.registerFunction( -// {.documentation = { -// .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", -// .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, -// .categories{"DataLake"}}, -// .allow_readonly = false}); -// } - -} - -#endif diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp deleted file mode 100644 index c6d84504c40..00000000000 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include -#include -#include -#include -#include "registerTableFunctions.h" - -namespace DB -{ - -struct TableFunctionHudiName -{ - static constexpr auto name = "hudi"; -}; -// using TableFunctionHudi = ITableFunctionDataLake; -// -// void registerTableFunctionHudi(TableFunctionFactory & factory) -// { -// factory.registerFunction( -// {.documentation -// = {.description=R"(The table function can be used to read the Hudi table stored on object store.)", -// .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, -// .categories{"DataLake"}}, -// .allow_readonly = false}); -// } -} - -#endif diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp deleted file mode 100644 index 1a28f9292d1..00000000000 --- a/src/TableFunctions/TableFunctionIceberg.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO - -#include -#include -#include -#include -#include "registerTableFunctions.h" - - -namespace DB -{ - -struct TableFunctionIcebergName -{ - static constexpr auto name = "iceberg"; -}; - -using TableFunctionIceberg = ITableFunctionDataLake< - TableFunctionIcebergName, - StorageIceberg, - TableFunctionS3>; - -void registerTableFunctionIceberg(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description=R"(The table function can be used to read the Iceberg table stored on object store.)", - .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}}, - .categories{"DataLake"}}, - .allow_readonly = false}); -} - -} - -#endif diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index de46c13af37..a948102ac2b 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -7,10 +7,10 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include @@ -24,7 +24,6 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; } template @@ -36,6 +35,15 @@ ObjectStoragePtr TableFunctionObjectStorage< return object_storage; } +template +StorageObjectStorageConfigurationPtr TableFunctionObjectStorage< + Definition, StorageSettings, Configuration>::getConfiguration() const +{ + if (!configuration) + configuration = std::make_shared(); + return configuration; +} + template std::vector TableFunctionObjectStorage< Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const @@ -65,8 +73,7 @@ template void TableFunctionObjectStorage< Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { - configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, engine_args, local_context, true); + StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true); } template @@ -147,6 +154,7 @@ StoragePtr TableFunctionObjectStorage>( { diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index 1df0ba2f843..5e180301862 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -2,10 +2,9 @@ #include "config.h" -#if USE_AZURE_BLOB_STORAGE - #include #include +#include #include @@ -114,6 +113,8 @@ public: static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); protected: + using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, @@ -125,9 +126,11 @@ protected: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const; - mutable typename StorageObjectStorage::ConfigurationPtr configuration; + ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const; + ConfigurationPtr getConfiguration() const; + + mutable ConfigurationPtr configuration; mutable ObjectStoragePtr object_storage; ColumnsDescription structure_hint; @@ -146,5 +149,3 @@ using TableFunctionAzureBlob = TableFunctionObjectStorage; #endif } - -#endif diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 8e6c96a3f2a..c93d816dc07 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -6,9 +6,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include namespace DB @@ -103,6 +103,8 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) } ); #endif + + UNUSED(factory); } #if USE_AWS_S3 diff --git a/src/TableFunctions/registerDataLakeTableFunctions.cpp b/src/TableFunctions/registerDataLakeTableFunctions.cpp new file mode 100644 index 00000000000..15a6668f434 --- /dev/null +++ b/src/TableFunctions/registerDataLakeTableFunctions.cpp @@ -0,0 +1,69 @@ +#include +#include + +namespace DB +{ + +#if USE_AWS_S3 +#if USE_AVRO +void registerTableFunctionIceberg(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the Iceberg table stored on object store.)", + .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +#if USE_PARQUET +void registerTableFunctionDeltaLake(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the DeltaLake table stored on object store.)", + .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +void registerTableFunctionHudi(TableFunctionFactory & factory) +{ + factory.registerFunction( + { + .documentation = + { + .description=R"(The table function can be used to read the Hudi table stored on object store.)", + .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}}, + .categories{"DataLake"} + }, + .allow_readonly = false + }); +} +#endif + +void registerDataLakeTableFunctions(TableFunctionFactory & factory) +{ + UNUSED(factory); +#if USE_AWS_S3 +#if USE_AVRO + registerTableFunctionIceberg(factory); +#endif +#if USE_PARQUET + registerTableFunctionDeltaLake(factory); +#endif + registerTableFunctionHudi(factory); +#endif +} + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 627d945fbf3..05fe147e076 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -68,8 +68,7 @@ void registerTableFunctions() registerTableFunctionObjectStorage(factory); registerTableFunctionObjectStorageCluster(factory); - - + registerDataLakeTableFunctions(factory); } } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index cefb198273e..7998a4b49d9 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -30,13 +30,6 @@ void registerTableFunctionS3Cluster(TableFunctionFactory & factory); void registerTableFunctionCOS(TableFunctionFactory & factory); void registerTableFunctionOSS(TableFunctionFactory & factory); void registerTableFunctionGCS(TableFunctionFactory & factory); -void registerTableFunctionHudi(TableFunctionFactory & factory); -#if USE_PARQUET -void registerTableFunctionDeltaLake(TableFunctionFactory & factory); -#endif -#if USE_AVRO -void registerTableFunctionIceberg(TableFunctionFactory & factory); -#endif #endif #if USE_HIVE @@ -67,10 +60,9 @@ void registerTableFunctionFormat(TableFunctionFactory & factory); void registerTableFunctionExplain(TableFunctionFactory & factory); -#if USE_AZURE_BLOB_STORAGE void registerTableFunctionObjectStorage(TableFunctionFactory & factory); void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory); -#endif +void registerDataLakeTableFunctions(TableFunctionFactory & factory); void registerTableFunctions(); From 7577257df558fb3bd74e862e7da7b0f1b485ffeb Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Feb 2024 17:29:03 +0100 Subject: [PATCH 019/651] Fix cluster functions --- .../ReadFromStorageObjectStorage.cpp | 6 +- .../ObjectStorage/StorageObjectStorage.cpp | 3 +- .../StorageObjectStorageCluster.cpp | 9 ++- .../StorageObjectStorageSource.cpp | 63 ++++++++++++++++--- .../StorageObjectStorageSource.h | 25 ++++++-- src/Storages/S3Queue/StorageS3Queue.cpp | 5 +- .../TableFunctionObjectStorage.cpp | 3 + .../TableFunctionObjectStorageCluster.cpp | 27 ++++---- 8 files changed, 110 insertions(+), 31 deletions(-) diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp index b33eea7d354..9c58fcdaa9a 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp @@ -49,7 +49,8 @@ void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predi auto context = getContext(); iterator_wrapper = StorageObjectStorageSource::createFileIterator( configuration, object_storage, distributed_processing, context, predicate, - virtual_columns, nullptr, query_settings.list_object_keys_size, context->getFileProgressCallback()); + virtual_columns, nullptr, query_settings.list_object_keys_size, metric_threads_count, + metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback()); } } @@ -75,7 +76,8 @@ void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pip auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, query_settings, - context, max_block_size, iterator_wrapper, need_only_count, schema_cache, std::move(threadpool)); + context, max_block_size, iterator_wrapper, need_only_count, schema_cache, + std::move(threadpool), metric_threads_count, metric_threads_active, metric_threads_scheduled); pipes.emplace_back(std::move(source)); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 08d7c9d0014..2e834da5529 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -289,7 +289,8 @@ ColumnsDescription StorageObjectStorage::getTableStructureFromD const auto settings = StorageSettings::create(context->getSettingsRef()); auto file_iterator = StorageObjectStorageSource::createFileIterator( configuration, object_storage, /* distributed_processing */false, - context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size); + context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size, + StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled()); ReadBufferIterator read_buffer_iterator( object_storage, configuration, file_iterator, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index c03bbd1a45d..f0d9ea400c4 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -88,7 +88,14 @@ StorageObjectStorageCluster::getTask auto iterator = std::make_shared( object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size); - auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next(0)->relative_path; }); + auto callback = std::make_shared>([iterator]() mutable -> String + { + auto object_info = iterator->next(0); + if (object_info) + return object_info->relative_path; + else + return ""; + }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 1fda75897f9..a8bde4cd56f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -42,7 +42,10 @@ StorageObjectStorageSource::StorageObjectStorageSource( std::shared_ptr file_iterator_, bool need_only_count_, SchemaCache & schema_cache_, - std::shared_ptr reader_pool_) + std::shared_ptr reader_pool_, + CurrentMetrics::Metric metric_threads_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -57,6 +60,9 @@ StorageObjectStorageSource::StorageObjectStorageSource( , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(schema_cache_) + , metric_threads(metric_threads_) + , metric_threads_active(metric_threads_active_) + , metric_threads_scheduled(metric_threads_scheduled_) , create_reader_scheduler(threadPoolCallbackRunner(*create_reader_pool, "Reader")) { } @@ -75,10 +81,16 @@ std::shared_ptr StorageObjectStorageSourc const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, size_t list_object_keys_size, + CurrentMetrics::Metric metric_threads_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_, std::function file_progress_callback) { if (distributed_processing) - return std::make_shared(local_context->getReadTaskCallback()); + return std::make_shared( + local_context->getReadTaskCallback(), + local_context->getSettingsRef().max_threads, + metric_threads_, metric_threads_active_, metric_threads_scheduled_); if (configuration->isNamespaceWithGlobs()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); @@ -380,19 +392,16 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor while (new_batch.empty()) { auto result = object_storage_iterator->getCurrentBatchAndScheduleNext(); - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", result.has_value()); - if (result.has_value()) - { - new_batch = std::move(result.value()); - } - else + if (!result.has_value()) { is_finished = true; return {}; } + new_batch = std::move(result.value()); for (auto it = new_batch.begin(); it != new_batch.end();) { + chassert(*it); if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher)) it = new_batch.erase(it); else @@ -406,8 +415,11 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor { std::vector paths; paths.reserve(new_batch.size()); - for (auto & object_info : new_batch) + for (const auto & object_info : new_batch) + { + chassert(object_info); paths.push_back(fs::path(configuration->getNamespace()) / object_info->relative_path); + } VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); } @@ -416,6 +428,7 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); object_infos = std::move(new_batch); + if (file_progress_callback) { for (const auto & object_info : object_infos) @@ -499,4 +512,36 @@ StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHol object_info = std::move(other.object_info); return *this; } + +StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( + const ReadTaskCallback & callback_, + size_t max_threads_count, + CurrentMetrics::Metric metric_threads_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_) + : callback(callback_) +{ + ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count); + auto pool_scheduler = threadPoolCallbackRunner(pool, "ReadTaskIter"); + + std::vector> keys; + keys.reserve(max_threads_count); + for (size_t i = 0; i < max_threads_count; ++i) + keys.push_back(pool_scheduler([this] { return callback(); }, Priority{})); + + pool.wait(); + buffer.reserve(max_threads_count); + for (auto & key_future : keys) + buffer.emplace_back(std::make_shared(key_future.get(), std::nullopt)); +} + +ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::next(size_t) +{ + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= buffer.size()) + return std::make_shared(callback()); + + return buffer[current_index]; +} + } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 214a7de14d6..14e59312c8c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -30,7 +30,10 @@ public: std::shared_ptr file_iterator_, bool need_only_count_, SchemaCache & schema_cache_, - std::shared_ptr reader_pool_); + std::shared_ptr reader_pool_, + CurrentMetrics::Metric metric_threads_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_); ~StorageObjectStorageSource() override; @@ -47,6 +50,9 @@ public: const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, size_t list_object_keys_size, + CurrentMetrics::Metric metric_threads_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_, std::function file_progress_callback = {}); protected: @@ -64,6 +70,10 @@ protected: SchemaCache & schema_cache; bool initialized = false; + const CurrentMetrics::Metric metric_threads; + const CurrentMetrics::Metric metric_threads_active; + const CurrentMetrics::Metric metric_threads_scheduled; + size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); @@ -123,14 +133,21 @@ public: class StorageObjectStorageSource::ReadTaskIterator : public IIterator { public: - explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {} + ReadTaskIterator( + const ReadTaskCallback & callback_, + size_t max_threads_count, + CurrentMetrics::Metric metric_threads_, + CurrentMetrics::Metric metric_threads_active_, + CurrentMetrics::Metric metric_threads_scheduled_); - size_t estimatedKeysCount() override { return 0; } /// TODO FIXME + size_t estimatedKeysCount() override { return buffer.size(); } - ObjectInfoPtr next(size_t) override { return std::make_shared(callback(), ObjectMetadata{}); } + ObjectInfoPtr next(size_t) override; private: ReadTaskCallback callback; + ObjectInfos buffer; + std::atomic_size_t index = 0; }; class StorageObjectStorageSource::GlobIterator : public IIterator, WithContext diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index b03224cedff..b256f030da1 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -364,7 +364,10 @@ std::shared_ptr StorageS3Queue::createSource( file_iterator, false, Storage::getSchemaCache(local_context), - threadpool); + threadpool, + CurrentMetrics::ObjectStorageS3Threads, + CurrentMetrics::ObjectStorageS3ThreadsActive, + CurrentMetrics::ObjectStorageS3ThreadsScheduled); auto file_deleter = [=, this](const std::string & path) mutable { diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index a948102ac2b..a48c95469d0 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -93,6 +93,7 @@ template ColumnsDescription TableFunctionObjectStorage< Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const { + chassert(configuration); if (configuration->structure == "auto") { context->checkAccess(getSourceAccessType()); @@ -107,6 +108,7 @@ template bool TableFunctionObjectStorage< Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context) { + chassert(configuration); return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); } @@ -127,6 +129,7 @@ StoragePtr TableFunctionObjectStoragestructure != "auto") columns = parseColumnsListFromString(configuration->structure, context); else if (!structure_hint.empty()) diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index c93d816dc07..5a29a693431 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -21,25 +21,23 @@ StoragePtr TableFunctionObjectStorageCluster; - StoragePtr storage; + auto configuration = Base::getConfiguration(); + bool structure_argument_was_provided = configuration->structure != "auto"; + ColumnsDescription columns; - bool structure_argument_was_provided = Base::configuration->structure != "auto"; - if (structure_argument_was_provided) - { - columns = parseColumnsListFromString(Base::configuration->structure, context); - } + columns = parseColumnsListFromString(configuration->structure, context); else if (!Base::structure_hint.empty()) - { columns = Base::structure_hint; - } + auto object_storage = Base::getObjectStorage(context, !is_insert_query); + StoragePtr storage; if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) { /// On worker node this filename won't contains globs storage = std::make_shared>( - Base::configuration, - Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query), + configuration, + object_storage, Definition::storage_type_name, context, StorageID(Base::getDatabaseName(), table_name), @@ -54,8 +52,8 @@ StoragePtr TableFunctionObjectStorageCluster>( ITableFunctionCluster::cluster_name, - Base::configuration, - Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query), + configuration, + object_storage, Definition::storage_type_name, StorageID(Base::getDatabaseName(), table_name), columns, @@ -87,7 +85,10 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) { .documentation = { .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", - .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .examples{{ + "azureBlobStorageCluster", + "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, " + "[account_name, account_key, format, compression, structure])", ""}}}, .allow_readonly = false } ); From ba0dc7bc54c8e621f63e3ba2f1bdbec15bdb9114 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 14 Feb 2024 10:32:29 +0100 Subject: [PATCH 020/651] fix failing style check and tests --- src/IO/ReadHelpers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index af66cbb4cb5..53a7229e7d5 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -539,7 +539,6 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) if (*buf.position() == '\r') ++buf.position(); - } } From bf12c376b0dde30092f0588a5439d7c7cab5e08b Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 16 Feb 2024 13:30:55 +0100 Subject: [PATCH 021/651] fix for fast tests failing on shell test --- tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index 1e8dee22d28..88448171516 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # Data preparation step -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH = $($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/data_without_crlf.tsv DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/data_with_crlf.tsv From 6e6bc97a3e0d8618dc80f5a26bb59f73623d1ccb Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 16 Feb 2024 13:42:58 +0100 Subject: [PATCH 022/651] fix failing style check --- tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index 88448171516..cb7472be418 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # Data preparation step -USER_FILES_PATH = $($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/data_without_crlf.tsv DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/data_with_crlf.tsv From 0552f44f70d76f25f268259a09cbbb10dc3781d7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 19 Feb 2024 10:45:56 +0100 Subject: [PATCH 023/651] Fixes after merge with master, move some part of code to object storage --- src/Backups/BackupIO_S3.cpp | 8 +- src/Disks/ObjectStorages/IObjectStorage.h | 3 +- .../ObjectStorages/ObjectStorageFactory.cpp | 4 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 57 ++++- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 12 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 110 +++++---- src/Disks/ObjectStorages/S3/diskSettings.h | 13 +- src/IO/S3Common.cpp | 7 +- src/Storages/Cache/SchemaCache.cpp | 2 + .../ObjectStorage/AzureBlob/Configuration.cpp | 7 +- .../ObjectStorage/AzureBlob/Configuration.h | 2 +- .../DataLakes/IStorageDataLake.h | 18 +- .../ObjectStorage/HDFS/Configuration.cpp | 7 +- .../ObjectStorage/HDFS/Configuration.h | 2 +- .../ObjectStorage/ReadBufferIterator.cpp | 210 +++++++++++++----- .../ObjectStorage/ReadBufferIterator.h | 12 +- .../ObjectStorage/S3/Configuration.cpp | 108 ++------- src/Storages/ObjectStorage/S3/Configuration.h | 18 +- .../ObjectStorage/StorageObjectStorage.cpp | 109 ++++++--- .../ObjectStorage/StorageObjectStorage.h | 20 +- .../StorageObjectStorageCluster.cpp | 30 ++- .../StorageObjectStorageCluster.h | 8 +- .../StorageObjectStorageConfiguration.cpp | 6 +- .../StorageObjectStorageConfiguration.h | 3 +- .../StorageObjectStorageSource.h | 4 + .../registerStorageObjectStorage.cpp | 2 +- src/Storages/S3Queue/StorageS3Queue.cpp | 8 +- src/Storages/StorageS3Settings.cpp | 11 +- src/Storages/StorageS3Settings.h | 8 +- .../TableFunctionObjectStorage.cpp | 6 +- .../TableFunctionObjectStorage.h | 6 +- .../TableFunctionObjectStorageCluster.cpp | 7 +- 32 files changed, 498 insertions(+), 330 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index fa4c1af3698..6c7b3674fb7 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -127,10 +127,10 @@ BackupReaderS3::BackupReaderS3( : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).value_or(S3Settings{})) { auto & request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context_->getSettingsRef()); + request_settings.updateFromSettingsIfChanged(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint request_settings.allow_native_copy = allow_s3_native_copy; client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_); @@ -217,10 +217,10 @@ BackupWriterS3::BackupWriterS3( : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).value_or(S3Settings{})) { auto & request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context_->getSettingsRef()); + request_settings.updateFromSettingsIfChanged(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint request_settings.allow_native_copy = allow_s3_native_copy; request_settings.setStorageClassName(storage_class_name); diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 8a5352e71ca..5ff618e08eb 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -193,8 +193,7 @@ public: virtual void applyNewSettings( const Poco::Util::AbstractConfiguration &, const std::string & /*config_prefix*/, - ContextPtr) - {} + ContextPtr) {} /// Sometimes object storages have something similar to chroot or namespace, for example /// buckets in S3. If object storage doesn't have any namepaces return empty string. diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index b3626135177..0855ba54d2f 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -126,7 +126,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings); + auto client = getClient(config, config_prefix, context, *settings, true); auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); auto object_storage = std::make_shared( @@ -162,7 +162,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings); + auto client = getClient(config, config_prefix, context, *settings, true); auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); auto object_storage = std::make_shared( diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a9bd520e6e9..7e856b45aea 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -242,7 +242,12 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (mode != WriteMode::Rewrite) throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); - auto settings_ptr = s3_settings.get(); + S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings; + if (auto query_context = CurrentThread::getQueryContext()) + { + request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef()); + } + ThreadPoolCallbackRunner scheduler; if (write_settings.s3_allow_parallel_part_upload) scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); @@ -256,7 +261,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN uri.bucket, object.remote_path, buf_size, - settings_ptr->request_settings, + request_settings, std::move(blob_storage_log), attributes, std::move(scheduler), @@ -534,19 +539,57 @@ void S3ObjectStorage::startup() const_cast(*client.get()).EnableRequestProcessing(); } -void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +void S3ObjectStorage::applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + if (!static_headers.empty()) + { + new_s3_settings->auth_settings.headers.insert( + new_s3_settings->auth_settings.headers.end(), + static_headers.begin(), static_headers.end()); + } + + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString())) + new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + + auto current_s3_settings = s3_settings.get(); + if (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3) + { + auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri); + client.set(std::move(new_client)); + } + s3_settings.set(std::move(new_s3_settings)); - client.set(std::move(new_client)); } +// void S3ObjectStorage::applyNewSettings(ContextPtr context) +// { +// auto settings = s3_settings.get(); +// if (!endpoint_settings || !settings->auth_settings.hasUpdates(endpoint_settings->auth_settings)) +// return; +// +// const auto & config = context->getConfigRef(); +// auto new_s3_settings = getSettings(uri, config, "s3.", context); +// +// new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); +// +// auto new_client = getClient(config, "s3.", context, *new_s3_settings, false); +// +// s3_settings.set(std::move(new_s3_settings)); +// client.set(std::move(new_client)); +// } + std::unique_ptr S3ObjectStorage::cloneObjectStorage( - const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true); String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); auto new_uri{uri}; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index a6843a383e5..187cdb58447 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -21,11 +21,13 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings( const S3Settings::RequestSettings & request_settings_, + const S3::AuthSettings & auth_settings_, uint64_t min_bytes_for_seek_, int32_t list_object_keys_size_, int32_t objects_chunk_size_to_delete_, bool read_only_) : request_settings(request_settings_) + , auth_settings(auth_settings_) , min_bytes_for_seek(min_bytes_for_seek_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) @@ -33,6 +35,7 @@ struct S3ObjectStorageSettings {} S3Settings::RequestSettings request_settings; + S3::AuthSettings auth_settings; uint64_t min_bytes_for_seek; int32_t list_object_keys_size; @@ -52,7 +55,9 @@ private: S3::URI uri_, const S3Capabilities & s3_capabilities_, ObjectStorageKeysGeneratorPtr key_generator_, - const String & disk_name_) + const String & disk_name_, + bool for_disk_s3_ = true, + const HTTPHeaderEntries & static_headers_ = {}) : uri(uri_) , key_generator(std::move(key_generator_)) , disk_name(disk_name_) @@ -60,6 +65,8 @@ private: , s3_settings(std::move(s3_settings_)) , s3_capabilities(s3_capabilities_) , log(getLogger(logger_name)) + , for_disk_s3(for_disk_s3_) + , static_headers(static_headers_) { } @@ -180,6 +187,9 @@ private: S3Capabilities s3_capabilities; LoggerPtr log; + + const bool for_disk_s3; + const HTTPHeaderEntries static_headers; }; /// Do not encode keys, store as-is, and do not require separate disk for metadata. diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 4fd4b17aabe..cb2bb690292 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -10,8 +10,6 @@ #include #include #include -#include "Disks/DiskFactory.h" - #include #include #include @@ -25,13 +23,19 @@ namespace DB { -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +std::unique_ptr getSettings( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_"); + /// TODO: add request settings prefix, becausse for StorageS3 it should be "s3." + + S3::AuthSettings auth_settings; + auth_settings.loadFromConfig(config_prefix, config); return std::make_unique( request_settings, + auth_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), @@ -42,78 +46,92 @@ std::unique_ptr getClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const S3ObjectStorageSettings & settings) + const S3ObjectStorageSettings & settings, + bool for_disk_s3, + const S3::URI * url_) { const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); const Settings & local_settings = context->getSettingsRef(); - String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - S3::URI uri(endpoint); - if (!uri.key.ends_with('/')) - uri.key.push_back('/'); + const auto & auth_settings = settings.auth_settings; + const auto & request_settings = settings.request_settings; + + S3::URI url; + if (for_disk_s3) + { + String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + S3::URI uri(endpoint); + if (!uri.key.ends_with('/')) + uri.key.push_back('/'); + } + else + { + if (!url_) + throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed"); + url = *url_; + } S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - config.getString(config_prefix + ".region", ""), + auth_settings.region, context->getRemoteHostFilter(), static_cast(global_settings.s3_max_redirects), static_cast(global_settings.s3_retry_attempts), global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ true, + for_disk_s3, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler, - uri.uri.getScheme()); + url.uri.getScheme()); + client_configuration.endpointOverride = url.endpoint; + client_configuration.maxConnections = static_cast(request_settings.max_connections); client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); - client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); - client_configuration.endpointOverride = uri.endpoint; - client_configuration.http_keep_alive_timeout_ms = config.getUInt( - config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000); - client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); - client_configuration.wait_on_pool_size_limit = false; - client_configuration.s3_use_adaptive_timeouts = config.getBool( - config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); - /* - * Override proxy configuration for backwards compatibility with old configuration format. - * */ - auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat( - ProxyConfiguration::protocolFromString(uri.uri.getScheme()), - config_prefix, - config - ); - if (proxy_config) + client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000); + client_configuration.http_connection_pool_size = config.getUInt( + config_prefix + ".http_connection_pool_size", static_cast(global_settings.s3_http_connection_pool_size.value)); + client_configuration.s3_use_adaptive_timeouts = config.getBool(config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); + client_configuration.wait_on_pool_size_limit = for_disk_s3; + + if (for_disk_s3) { - client_configuration.per_request_configuration - = [proxy_config]() { return proxy_config->resolve(); }; - client_configuration.error_report - = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; + /* + * Override proxy configuration for backwards compatibility with old configuration format. + * */ + if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat( + ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config)) + { + client_configuration.per_request_configuration + = [proxy_config]() { return proxy_config->resolve(); }; + client_configuration.error_report + = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; + } } - HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config); S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config); - S3::ClientSettings client_settings{ - .use_virtual_addressing = uri.is_virtual_hosted_style, + .use_virtual_addressing = url.is_virtual_hosted_style, .disable_checksum = local_settings.s3_disable_checksum, .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false), }; + auto credentials_configuration = S3::CredentialsConfiguration + { + auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), + auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), + auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), + auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), + }; + return S3::ClientFactory::instance().create( client_configuration, client_settings, - config.getString(config_prefix + ".access_key_id", ""), - config.getString(config_prefix + ".secret_access_key", ""), - config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""), + auth_settings.access_key_id, + auth_settings.secret_access_key, + auth_settings.server_side_encryption_customer_key_base64, std::move(sse_kms_config), - std::move(headers), - S3::CredentialsConfiguration - { - config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)), - config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)), - config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false)) - }); + auth_settings.headers, + credentials_configuration); } } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index 83bf7b179ef..194035365ea 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -22,9 +22,18 @@ namespace DB struct S3ObjectStorageSettings; -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); +std::unique_ptr getSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context); -std::unique_ptr getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings); +std::unique_ptr getClient( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context, + const S3ObjectStorageSettings & settings, + bool for_disk_s3, + const S3::URI * url_ = nullptr); } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 5039059f522..d33d5284240 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -157,8 +157,11 @@ void AuthSettings::updateFrom(const AuthSettings & from) if (!from.session_token.empty()) session_token = from.session_token; - headers = from.headers; - region = from.region; + if (!from.headers.empty()) + headers = from.headers; + if (!from.region.empty()) + region = from.region; + server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; server_side_encryption_kms_config = from.server_side_encryption_kms_config; diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp index 299dd292772..35fb8d348ef 100644 --- a/src/Storages/Cache/SchemaCache.cpp +++ b/src/Storages/Cache/SchemaCache.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace ProfileEvents @@ -109,6 +110,7 @@ std::optional SchemaCache::tryGetImpl(const Key & key, } ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheHits); + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", StackTrace().toString()); auto & schema_info = it->second.schema_info; auto & queue_iterator = it->second.iterator; diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 109918dfc8b..9d21541e7e2 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -102,7 +102,7 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co return settings_ptr; } -ObjectStoragePtr StorageAzureBlobConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { auto client = createClient(is_readonly); auto settings = createSettings(context); @@ -245,8 +245,6 @@ void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection & compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); blobs_paths = {blob_path}; - if (format == "auto") - format = FormatFactory::instance().getFormatFromFileName(blob_path, true); } void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) @@ -367,9 +365,6 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte } blobs_paths = {blob_path}; - - if (format == "auto") - format = FormatFactory::instance().getFormatFromFileName(blob_path, true); } void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h index deeb365d012..3d701e72cb4 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h @@ -31,7 +31,7 @@ public: String getNamespace() const override { return container; } void check(ContextPtr context) const override; - ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } void fromNamedCollection(const NamedCollection & collection) override; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 95196cdd000..8a21fc1152f 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -39,7 +39,7 @@ public: std::optional format_settings_, bool attach) { - auto object_storage = base_configuration->createOrUpdateObjectStorage(context); + auto object_storage = base_configuration->createObjectStorage(context); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; ConfigurationPtr configuration = base_configuration->clone(); @@ -75,28 +75,22 @@ public: return ColumnsDescription(metadata->getTableSchema()); } - std::pair updateConfigurationAndGetCopy(ContextPtr local_context) override + void updateConfiguration(ContextPtr local_context) override { std::lock_guard lock(Storage::configuration_update_mutex); - auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context); - bool updated = new_object_storage != nullptr; - if (updated) - Storage::object_storage = new_object_storage; + Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - if (!current_metadata || !(*current_metadata == *new_metadata)) - current_metadata = std::move(new_metadata); - else if (!updated) - return {Storage::configuration, Storage::object_storage}; + if (current_metadata && *current_metadata == *new_metadata) + return; + current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); /// If metadata wasn't changed, we won't list data files again. updated_configuration->getPaths() = current_metadata->getDataFiles(); Storage::configuration = updated_configuration; - - return {Storage::configuration, Storage::object_storage}; } template diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index c80237b3055..731b05f4621 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -27,7 +27,7 @@ void StorageHDFSConfiguration::check(ContextPtr context) const checkHDFSURL(url); } -ObjectStoragePtr StorageHDFSConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { UNUSED(is_readonly); auto settings = std::make_unique(); @@ -42,16 +42,13 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_str if (args.size() > 1) format_name = checkAndGetLiteralArgument(args[1], "format_name"); - if (format_name == "auto") - format_name = FormatFactory::instance().getFormatFromFileName(url, true); - String compression_method; if (args.size() == 3) compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); else compression_method = "auto"; - } + } #endif diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 03fb0824123..1013c2e00c2 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -26,7 +26,7 @@ public: String getDataSourceDescription() override { return url; } void check(ContextPtr context) const override; - ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } void fromNamedCollection(const NamedCollection &) override {} diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index a3e19b907bc..a0e719878ac 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -10,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int CANNOT_DETECT_FORMAT; } @@ -30,14 +31,15 @@ ReadBufferIterator::ReadBufferIterator( , query_settings(query_settings_) , schema_cache(schema_cache_) , read_keys(read_keys_) + , format(configuration->format.empty() || configuration->format == "auto" ? std::nullopt : std::optional(configuration->format)) , prev_read_keys_size(read_keys_.size()) { } -SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path) const +SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path, const String & format_name) const { auto source = fs::path(configuration->getDataSourceDescription()) / path; - return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext()); + return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext()); } SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const @@ -51,7 +53,7 @@ SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const { return fs::path(configuration->getDataSourceDescription()) / elem->relative_path; }); - return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext()); + return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); } std::optional ReadBufferIterator::tryGetColumnsFromCache( @@ -75,10 +77,29 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( } }; - auto cache_key = getKeyForSchemaCache(object_info->relative_path); - auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); - if (columns) - return columns; + chassert(object_info); + if (format) + { + auto cache_key = getKeyForSchemaCache(object_info->relative_path, *format); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry for some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(object_info->relative_path, format_name); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + { + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; + } + } + } } return std::nullopt; @@ -86,16 +107,18 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) { + chassert(current_object_info); if (query_settings.schema_inference_use_cache) - schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path), num_rows); + schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path, *format), num_rows); } void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) { + chassert(current_object_info); if (query_settings.schema_inference_use_cache && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) { - schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path), columns); + schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path, *format), columns); } } @@ -108,6 +131,11 @@ void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns) } } +void ReadBufferIterator::setFormatName(const String & format_name) +{ + format = format_name; +} + String ReadBufferIterator::getLastFileName() const { if (current_object_info) @@ -116,64 +144,128 @@ String ReadBufferIterator::getLastFileName() const return ""; } -std::pair, std::optional> ReadBufferIterator::next() +std::unique_ptr ReadBufferIterator::recreateLastReadBuffer() { - /// For default mode check cached columns for currently read keys on first iteration. - if (first && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns}; - } + chassert(current_object_info); - current_object_info = file_iterator->next(0); - if (!current_object_info || current_object_info->relative_path.empty()) + auto impl = object_storage->readObject( + StoredObject(current_object_info->relative_path), getContext()->getReadSettings()); + + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + return wrapReadBufferWithCompressionMethod( + std::move(impl), chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), + zstd_window_log_max); +} + +ReadBufferIterator::Data ReadBufferIterator::next() +{ + if (first) { - if (first) + /// If format is unknown we iterate through all currently read keys on first iteration and + /// try to determine format by file name. + if (!format) { - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, " - "because there are no files with provided path. " - "You must specify table structure manually", - configuration->format); + for (const auto & object_info : read_keys) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->relative_path)) + { + format = format_from_file_name; + break; + } + } + } + + /// For default mode check cached columns for currently read keys on first iteration. + if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + return {nullptr, cached_columns, format}; } - return {nullptr, std::nullopt}; } - first = false; - - /// File iterator could get new keys after new iteration, - /// check them in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT - && read_keys.size() > prev_read_keys_size) + while (true) { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - prev_read_keys_size = read_keys.size(); - if (columns_from_cache) - return {nullptr, columns_from_cache}; + current_object_info = file_iterator->next(0); + + if (!current_object_info || current_object_info->relative_path.empty()) + { + if (first) + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because there are no files with provided path " + "in S3 or all files are empty. You can specify table structure manually", + *format); + + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because there are no files with provided path " + "in S3 or all files are empty. You can specify the format manually"); + } + + return {nullptr, std::nullopt, format}; + } + + /// S3 file iterator could get new keys after new iteration + if (read_keys.size() > prev_read_keys_size) + { + /// If format is unknown we can try to determine it by new file names. + if (!format) + { + for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) + { + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->relative_path)) + { + format = format_from_file_name; + break; + } + } + } + + /// Check new files in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + if (columns_from_cache) + return {nullptr, columns_from_cache, format}; + } + + prev_read_keys_size = read_keys.size(); + } + + if (getContext()->getSettingsRef().s3_skip_empty_files + && current_object_info->metadata && current_object_info->metadata->size_bytes == 0) + continue; + + /// In union mode, check cached columns only for current key. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) + { + ObjectInfos objects{current_object_info}; + if (auto columns_from_cache = tryGetColumnsFromCache(objects.begin(), objects.end())) + { + first = false; + return {nullptr, columns_from_cache, format}; + } + } + + std::unique_ptr read_buffer = object_storage->readObject( + StoredObject(current_object_info->relative_path), + getContext()->getReadSettings(), + {}, + current_object_info->metadata->size_bytes); + + if (!getContext()->getSettingsRef().s3_skip_empty_files || !read_buffer->eof()) + { + first = false; + + read_buffer = wrapReadBufferWithCompressionMethod( + std::move(read_buffer), + chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), + static_cast(getContext()->getSettingsRef().zstd_window_log_max)); + + return {std::move(read_buffer), std::nullopt, format}; + } } - else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - ObjectInfos paths = {current_object_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end())) - return {nullptr, columns_from_cache}; - } - - first = false; - - chassert(current_object_info->metadata); - std::unique_ptr read_buffer = object_storage->readObject( - StoredObject(current_object_info->relative_path), - getContext()->getReadSettings(), - {}, - current_object_info->metadata->size_bytes); - - read_buffer = wrapReadBufferWithCompressionMethod( - std::move(read_buffer), - chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), - static_cast(getContext()->getSettingsRef().zstd_window_log_max)); - - return {std::move(read_buffer), std::nullopt}; } - } diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 4e9b8cfcfca..053bcbf894f 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -23,7 +24,7 @@ public: ObjectInfos & read_keys_, const ContextPtr & context_); - std::pair, std::optional> next() override; + Data next() override; void setNumRowsToLastFile(size_t num_rows) override; @@ -33,8 +34,14 @@ public: String getLastFileName() const override; + void setFormatName(const String & format_name) override; + + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override; + private: - SchemaCache::Key getKeyForSchemaCache(const String & path) const; + SchemaCache::Key getKeyForSchemaCache(const String & path, const String & format_name) const; SchemaCache::Keys getPathsForSchemaCache() const; std::optional tryGetColumnsFromCache( const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end); @@ -46,6 +53,7 @@ private: const StorageObjectStorageSettings query_settings; SchemaCache & schema_cache; ObjectInfos & read_keys; + std::optional format; size_t prev_read_keys_size; ObjectInfoPtr current_object_info; diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index f057745d669..896131e74d7 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -58,106 +59,47 @@ void StorageS3Configuration::check(ContextPtr context) const StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) { url = other.url; - auth_settings = other.auth_settings; - request_settings = other.request_settings; static_configuration = other.static_configuration; headers_from_ast = other.headers_from_ast; keys = other.keys; - initialized = other.initialized; format = other.format; compression_method = other.compression_method; structure = other.structure; } -ObjectStoragePtr StorageS3Configuration::createOrUpdateObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT +ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT { - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString()); - request_settings = s3_settings.request_settings; - request_settings.updateFromSettings(context->getSettings()); + const auto & config = context->getConfigRef(); + const std::string config_prefix = "s3."; - if (!initialized || (!static_configuration && auth_settings.hasUpdates(s3_settings.auth_settings))) + auto s3_settings = getSettings(config, config_prefix, context); + + auth_settings.updateFrom(s3_settings->auth_settings); + s3_settings->auth_settings = auth_settings; + s3_settings->request_settings = request_settings; + + if (!headers_from_ast.empty()) { - auth_settings.updateFrom(s3_settings.auth_settings); - keys[0] = url.key; - initialized = true; + s3_settings->auth_settings.headers.insert( + s3_settings->auth_settings.headers.end(), + headers_from_ast.begin(), headers_from_ast.end()); } - const auto & config = context->getConfigRef(); + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString())) + s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + + auto client = getClient(config, config_prefix, context, *s3_settings, false, &url); + auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key); auto s3_capabilities = S3Capabilities { .support_batch_delete = config.getBool("s3.support_batch_delete", true), .support_proxy = config.getBool("s3.support_proxy", config.has("s3.proxy")), }; - auto s3_storage_settings = std::make_unique( - request_settings, - config.getUInt64("s3.min_bytes_for_seek", 1024 * 1024), - config.getInt("s3.list_object_keys_size", 1000), - config.getInt("s3.objects_chunk_size_to_delete", 1000), - config.getBool("s3.readonly", false)); - - auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key); - auto client = createClient(context); - std::string disk_name = "StorageS3"; - return std::make_shared( - std::move(client), std::move(s3_storage_settings), url, s3_capabilities, key_generator, /*disk_name*/disk_name); -} - -std::unique_ptr StorageS3Configuration::createClient(ContextPtr context) -{ - const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - - auto client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, - /* for_disk_s3 = */ false, - request_settings.get_request_throttler, - request_settings.put_request_throttler, - url.uri.getScheme()); - - client_configuration.endpointOverride = url.endpoint; - client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size; - - auto headers = auth_settings.headers; - if (!headers_from_ast.empty()) - headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end()); - - client_configuration.requestTimeoutMs = request_settings.request_timeout_ms; - - S3::ClientSettings client_settings{ - .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), - }; - - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, - auth_settings.secret_access_key, - auth_settings.session_token); - - auto credentials_configuration = S3::CredentialsConfiguration - { - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }; - - return S3::ClientFactory::instance().create( - client_configuration, - client_settings, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - auth_settings.server_side_encryption_kms_config, - std::move(headers), - credentials_configuration); + std::move(client), std::move(s3_settings), url, s3_capabilities, + key_generator, "StorageS3", false, headers_from_ast); } void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection) @@ -185,10 +127,6 @@ void StorageS3Configuration::fromNamedCollection(const NamedCollection & collect static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); keys = {url.key}; - - //if (format == "auto" && get_format_from_file) - if (format == "auto") - format = FormatFactory::instance().getFormatFromFileName(url.key, true); } void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure) @@ -386,10 +324,6 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_ auth_settings.no_sign_request = no_sign_request; keys = {url.key}; - - // if (format == "auto" && get_format_from_file) - if (format == "auto") - format = FormatFactory::instance().getFormatFromFileName(url.key, true); } void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index 037cf2eae87..88a084f29b3 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -27,27 +27,25 @@ public: String getDataSourceDescription() override; void check(ContextPtr context) const override; - ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } + bool isStaticConfiguration() const override { return static_configuration; } - void fromNamedCollection(const NamedCollection & collection) override; - void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context); private: + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + S3::URI url; + std::vector keys; + S3::AuthSettings auth_settings; S3Settings::RequestSettings request_settings; + HTTPHeaderEntries headers_from_ast; /// Headers from ast is a part of static configuration. /// If s3 configuration was passed from ast, then it is static. /// If from config - it can be changed with config reload. bool static_configuration = true; - /// Headers from ast is a part of static configuration. - HTTPHeaderEntries headers_from_ast; - std::vector keys; - - std::unique_ptr createClient(ContextPtr context); - - bool initialized = false; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 2e834da5529..7337a528a76 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -13,8 +14,9 @@ #include #include #include -#include #include +#include +#include namespace DB @@ -39,21 +41,24 @@ std::unique_ptr getStorageMetadata( const std::string & engine_name, const ContextPtr & context) { + using Storage = StorageObjectStorage; + auto storage_metadata = std::make_unique(); if (columns.empty()) { - auto fetched_columns = StorageObjectStorage::getTableStructureFromData( - object_storage, configuration, format_settings, context); + auto fetched_columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context); storage_metadata->setColumns(fetched_columns); } + else if (!columns.hasOnlyOrdinary()) + { + /// We don't allow special columns. + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine {} doesn't support special columns " + "like MATERIALIZED, ALIAS or EPHEMERAL", engine_name); + } else { - /// We don't allow special columns. - if (!columns.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Table engine {} doesn't support special columns " - "like MATERIALIZED, ALIAS or EPHEMERAL", - engine_name); + if (configuration->format == "auto") + Storage::setFormatFromData(object_storage, configuration, format_settings, context); storage_metadata->setColumns(columns); } @@ -120,14 +125,10 @@ bool StorageObjectStorage::parallelizeOutputAfterReading(Contex } template -std::pair -StorageObjectStorage::updateConfigurationAndGetCopy(ContextPtr local_context) +void StorageObjectStorage::updateConfiguration(ContextPtr context) { - std::lock_guard lock(configuration_update_mutex); - auto new_object_storage = configuration->createOrUpdateObjectStorage(local_context); - if (new_object_storage) - object_storage = new_object_storage; - return {configuration, object_storage}; + if (!configuration->isStaticConfiguration()) + object_storage->applyNewSettings(context->getConfigRef(), "s3.", context); } template @@ -151,8 +152,8 @@ void StorageObjectStorage::read( size_t max_block_size, size_t num_streams) { - auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context); - if (partition_by && query_configuration->withWildcard()) + updateConfiguration(local_context); + if (partition_by && configuration->withWildcard()) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned {} storage is not implemented yet", @@ -165,8 +166,8 @@ void StorageObjectStorage::read( && local_context->getSettingsRef().optimize_count_from_files; auto read_step = std::make_unique( - query_object_storage, - query_configuration, + object_storage, + configuration, getName(), virtual_columns, format_settings, @@ -192,10 +193,10 @@ SinkToStoragePtr StorageObjectStorage::write( ContextPtr local_context, bool /* async_insert */) { - auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context); + updateConfiguration(local_context); const auto sample_block = metadata_snapshot->getSampleBlock(); - if (query_configuration->withWildcard()) + if (configuration->withWildcard()) { ASTPtr partition_by_ast = nullptr; if (auto insert_query = std::dynamic_pointer_cast(query)) @@ -209,24 +210,28 @@ SinkToStoragePtr StorageObjectStorage::write( if (partition_by_ast) { return std::make_shared( - object_storage, query_configuration, format_settings, sample_block, local_context, partition_by_ast); + object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); } } - if (query_configuration->withGlobs()) + if (configuration->withGlobs()) { throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "{} key '{}' contains globs, so the table is in readonly mode", - getName(), query_configuration->getPath()); + getName(), configuration->getPath()); } const auto storage_settings = StorageSettings::create(local_context->getSettingsRef()); + + LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII: {}", object_storage->exists(StoredObject(configuration->getPath()))); + auto configuration_copy = configuration->clone(); if (!storage_settings.truncate_on_insert - && object_storage->exists(StoredObject(query_configuration->getPath()))) + && object_storage->exists(StoredObject(configuration->getPath()))) { + LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 2: {}", storage_settings.create_new_file_on_insert); if (storage_settings.create_new_file_on_insert) { - auto & paths = query_configuration->getPaths(); + auto & paths = configuration_copy->getPaths(); size_t index = paths.size(); const auto & first_key = paths[0]; auto pos = first_key.find_first_of('.'); @@ -243,6 +248,7 @@ SinkToStoragePtr StorageObjectStorage::write( while (object_storage->exists(StoredObject(new_key))); paths.push_back(new_key); + configuration->getPaths().push_back(new_key); } else { @@ -251,12 +257,13 @@ SinkToStoragePtr StorageObjectStorage::write( "Object in bucket {} with key {} already exists. " "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you " "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert", - query_configuration->getNamespace(), query_configuration->getPaths().back()); + configuration_copy->getNamespace(), configuration_copy->getPaths().back()); } } + LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 3: {}", configuration_copy->getPaths().size()); return std::make_shared( - object_storage, query_configuration, format_settings, sample_block, local_context); + object_storage, configuration_copy, format_settings, sample_block, local_context); } template @@ -279,25 +286,55 @@ void StorageObjectStorage::truncate( } template -ColumnsDescription StorageObjectStorage::getTableStructureFromData( - ObjectStoragePtr object_storage, +std::unique_ptr StorageObjectStorage::createReadBufferIterator( + const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, - ContextPtr context) + ObjectInfos & read_keys, + const ContextPtr & context) { - ObjectInfos read_keys; const auto settings = StorageSettings::create(context->getSettingsRef()); auto file_iterator = StorageObjectStorageSource::createFileIterator( configuration, object_storage, /* distributed_processing */false, context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size, StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled()); - ReadBufferIterator read_buffer_iterator( + return std::make_unique( object_storage, configuration, file_iterator, format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context); +} - const bool retry = configuration->withGlobs(); - return readSchemaFromFormat(configuration->format, format_settings, read_buffer_iterator, retry, context); +template +ColumnsDescription StorageObjectStorage::getTableStructureFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + if (configuration->format == "auto") + { + auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context); + configuration->format = format; + return columns; + } + else + { + return readSchemaFromFormat(configuration->format, format_settings, *read_buffer_iterator, context); + } +} + +template +void StorageObjectStorage::setFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second; } template class StorageObjectStorage; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 6f18153c7af..64c4c74ab22 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -21,6 +21,7 @@ using ReadTaskCallback = std::function; class IOutputFormat; class IInputFormat; class SchemaCache; +class ReadBufferIterator; template @@ -89,13 +90,26 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context); static ColumnsDescription getTableStructureFromData( - ObjectStoragePtr object_storage, + const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, - ContextPtr context); + const ContextPtr & context); + + static void setFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); protected: - virtual std::pair updateConfigurationAndGetCopy(ContextPtr local_context); + virtual void updateConfiguration(ContextPtr local_context); + + static std::unique_ptr createReadBufferIterator( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + ObjectInfos & read_keys, + const ContextPtr & context); const std::string engine_name; const NamesAndTypesList virtual_columns; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index f0d9ea400c4..2bd2c022aa8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -33,12 +33,10 @@ StorageObjectStorageCluster::Storage const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_) + ContextPtr context_) : IStorageCluster(cluster_name_, table_id_, - getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)), - structure_argument_was_provided_) + getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name))) , engine_name(engine_name_) , configuration{configuration_} , object_storage(object_storage_) @@ -48,13 +46,16 @@ StorageObjectStorageCluster::Storage if (columns_.empty()) { - /// `format_settings` is set to std::nullopt, because StorageObjectStorageCluster is used only as table function - auto columns = StorageObjectStorage::getTableStructureFromData( - object_storage, configuration, /*format_settings=*/std::nullopt, context_); + ColumnsDescription columns = Storage::getTableStructureFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_); storage_metadata.setColumns(columns); } else + { + if (configuration->format == "auto") + StorageS3::setFormatFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_); + storage_metadata.setColumns(columns_); + } storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); @@ -64,9 +65,9 @@ StorageObjectStorageCluster::Storage } template -void StorageObjectStorageCluster::addColumnsStructureToQuery( +void StorageObjectStorageCluster::updateQueryToSendIfNeeded( ASTPtr & query, - const String & structure, + const DB::StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) { ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); @@ -76,13 +77,18 @@ void StorageObjectStorageCluster::ad "Expected SELECT query from table function {}, got '{}'", engine_name, queryToString(query)); } - using TableFunction = TableFunctionObjectStorageCluster; - TableFunction::addColumnsStructureToArguments(expression_list->children, structure, context); + + TableFunction::updateStructureAndFormatArgumentsIfNeeded( + expression_list->children, + storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), + configuration->format, + context); } template RemoteQueryExecutor::Extension -StorageObjectStorageCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & local_context) const +StorageObjectStorageCluster::getTaskIteratorExtension( + const ActionsDAG::Node * predicate, const ContextPtr & local_context) const { const auto settings = StorageSettings::create(local_context->getSettingsRef()); auto iterator = std::make_shared( diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 507de20e888..5d77d4ced60 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -21,6 +21,7 @@ class StorageObjectStorageCluster : public IStorageCluster { public: using Storage = StorageObjectStorage; + using TableFunction = TableFunctionObjectStorageCluster; StorageObjectStorageCluster( const String & cluster_name_, @@ -30,8 +31,7 @@ public: const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - ContextPtr context_, - bool structure_argument_was_provided_); + ContextPtr context_); std::string getName() const override { return engine_name; } @@ -49,9 +49,9 @@ public: private: void updateBeforeRead(const ContextPtr & /* context */) override {} - void addColumnsStructureToQuery( + void updateQueryToSendIfNeeded( ASTPtr & query, - const String & structure, + const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; const String & engine_name; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 651f1d25ec1..a1c7d468fa6 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -1,5 +1,5 @@ #include - +#include namespace DB { @@ -14,6 +14,10 @@ void StorageObjectStorageConfiguration::initialize( configuration.fromNamedCollection(*named_collection); else configuration.fromAST(engine_args, local_context, with_table_structure); + + // FIXME: it should be - if (format == "auto" && get_format_from_file) + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); } bool StorageObjectStorageConfiguration::withWildcard() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 04b2d8e8fd9..2da262eb55d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -39,8 +39,9 @@ public: std::string getPathWithoutGlob() const; virtual void check(ContextPtr context) const = 0; - virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT + virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT virtual StorageObjectStorageConfigurationPtr clone() = 0; + virtual bool isStaticConfiguration() const { return true; } String format = "auto"; String compression_method = "auto"; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 14e59312c8c..3b503fd4f0c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -5,10 +5,14 @@ #include #include #include +#include namespace DB { + +class SchemaCache; + class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { friend class StorageS3QueueSource; diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index e23457c04e9..3271b766f68 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -56,7 +56,7 @@ static std::shared_ptr> createStorageObjec return std::make_shared>( configuration, - configuration->createOrUpdateObjectStorage(context), + configuration->createObjectStorage(context), engine_name, args.getContext(), args.table_id, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 2673aa94347..bd526ad687b 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -134,7 +134,7 @@ StorageS3Queue::StorageS3Queue( checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); - object_storage = configuration->createOrUpdateObjectStorage(context_); + object_storage = configuration->createObjectStorage(context_); FormatFactory::instance().checkFormatName(configuration->format); configuration->check(context_); @@ -146,8 +146,10 @@ StorageS3Queue::StorageS3Queue( } else { - if (configuration.format == "auto") - configuration.format = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_).second; + if (configuration->format == "auto") + { + StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_); + } storage_metadata.setColumns(columns_); } diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index b0c1160429a..8510a6e4bdd 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings) { - updateFromSettingsImpl(settings, false); + updateFromSettings(settings, false); validate(); } @@ -66,7 +66,7 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedC validate(); } -void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +void S3Settings::RequestSettings::PartUploadSettings::updateFromSettings(const Settings & settings, bool if_changed) { if (!if_changed || settings.s3_strict_upload_part_size.changed) strict_upload_part_size = settings.s3_strict_upload_part_size; @@ -263,13 +263,12 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin request_timeout_ms = settings.s3_request_timeout_ms; } -void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) +void S3Settings::RequestSettings::updateFromSettingsIfChanged(const Settings & settings) { updateFromSettingsImpl(settings, true); - upload_settings.updateFromSettings(settings); + upload_settings.updateFromSettings(settings, true); } - void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { std::lock_guard lock(mutex); @@ -293,7 +292,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint) const +std::optional StorageS3Settings::getSettings(const String & endpoint) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 0e152bb2d31..a4bc9f0b5cf 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -39,7 +39,7 @@ struct S3Settings size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024; String storage_class_name; - void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); } + void updateFromSettings(const Settings & settings, bool if_changed); void validate(); private: @@ -52,8 +52,6 @@ struct S3Settings const Settings & settings, String setting_name_prefix = {}); - void updateFromSettingsImpl(const Settings & settings, bool if_changed); - friend struct RequestSettings; }; @@ -96,7 +94,7 @@ struct S3Settings const Settings & settings, String setting_name_prefix = {}); - void updateFromSettings(const Settings & settings); + void updateFromSettingsIfChanged(const Settings & settings); private: void updateFromSettingsImpl(const Settings & settings, bool if_changed); @@ -112,7 +110,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint) const; + std::optional getSettings(const String & endpoint) const; private: mutable std::mutex mutex; diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index a48c95469d0..b07b328eed9 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -31,7 +31,7 @@ ObjectStoragePtr TableFunctionObjectStorage< Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const { if (!object_storage) - object_storage = configuration->createOrUpdateObjectStorage(context, create_readonly); + object_storage = configuration->createObjectStorage(context, create_readonly); return object_storage; } @@ -63,8 +63,8 @@ std::vector TableFunctionObjectStorage< } template -void TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +void TableFunctionObjectStorage::updateStructureAndFormatArgumentsIfNeeded( + ASTs & args, const String & structure, const String & /* format */, const ContextPtr & context) { Configuration::addStructureToArgs(args, structure, context); } diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index 5e180301862..9022f6e577f 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -110,7 +110,11 @@ public: virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); + static void updateStructureAndFormatArgumentsIfNeeded( + ASTs & args, + const String & structure, + const String & format, + const ContextPtr & context); protected: using ConfigurationPtr = StorageObjectStorageConfigurationPtr; diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 5a29a693431..55b41cf6ca8 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -20,12 +20,10 @@ StoragePtr TableFunctionObjectStorageCluster; - auto configuration = Base::getConfiguration(); - bool structure_argument_was_provided = configuration->structure != "auto"; ColumnsDescription columns; - if (structure_argument_was_provided) + if (configuration->structure != "auto") columns = parseColumnsListFromString(configuration->structure, context); else if (!Base::structure_hint.empty()) columns = Base::structure_hint; @@ -58,8 +56,7 @@ StoragePtr TableFunctionObjectStorageClusterstartup(); From 2e9b6545b6f060e1fa92970276116734f483f417 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 19 Feb 2024 18:24:23 +0100 Subject: [PATCH 024/651] Fix --- src/Disks/ObjectStorages/S3/diskSettings.cpp | 16 ++++++------- src/Storages/Cache/SchemaCache.cpp | 1 - .../ObjectStorage/StorageObjectStorage.cpp | 3 --- .../StorageObjectStorageCluster.cpp | 3 ++- .../StorageObjectStorageSource.cpp | 24 ++++++++++--------- .../TableFunctionObjectStorageCluster.cpp | 2 +- 6 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index cb2bb690292..43b1cffb3e6 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -27,12 +27,8 @@ std::unique_ptr getSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); - S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_"); - /// TODO: add request settings prefix, becausse for StorageS3 it should be "s3." - - S3::AuthSettings auth_settings; - auth_settings.loadFromConfig(config_prefix, config); - + auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_"); + auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); return std::make_unique( request_settings, auth_settings, @@ -60,9 +56,9 @@ std::unique_ptr getClient( if (for_disk_s3) { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - S3::URI uri(endpoint); - if (!uri.key.ends_with('/')) - uri.key.push_back('/'); + url = S3::URI(endpoint); + if (!url.key.ends_with('/')) + url.key.push_back('/'); } else { @@ -123,6 +119,8 @@ std::unique_ptr getClient( auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), }; + LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {} - {}", auth_settings.access_key_id, auth_settings.secret_access_key); + return S3::ClientFactory::instance().create( client_configuration, client_settings, diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp index 35fb8d348ef..5dc39f04ae0 100644 --- a/src/Storages/Cache/SchemaCache.cpp +++ b/src/Storages/Cache/SchemaCache.cpp @@ -110,7 +110,6 @@ std::optional SchemaCache::tryGetImpl(const Key & key, } ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheHits); - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", StackTrace().toString()); auto & schema_info = it->second.schema_info; auto & queue_iterator = it->second.iterator; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 7337a528a76..30f5c36879c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -223,12 +223,10 @@ SinkToStoragePtr StorageObjectStorage::write( const auto storage_settings = StorageSettings::create(local_context->getSettingsRef()); - LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII: {}", object_storage->exists(StoredObject(configuration->getPath()))); auto configuration_copy = configuration->clone(); if (!storage_settings.truncate_on_insert && object_storage->exists(StoredObject(configuration->getPath()))) { - LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 2: {}", storage_settings.create_new_file_on_insert); if (storage_settings.create_new_file_on_insert) { auto & paths = configuration_copy->getPaths(); @@ -260,7 +258,6 @@ SinkToStoragePtr StorageObjectStorage::write( configuration_copy->getNamespace(), configuration_copy->getPaths().back()); } } - LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 3: {}", configuration_copy->getPaths().size()); return std::make_shared( object_storage, configuration_copy, format_settings, sample_block, local_context); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 2bd2c022aa8..9b98051086d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -92,7 +92,8 @@ StorageObjectStorageCluster::getTask { const auto settings = StorageSettings::create(local_context->getSettingsRef()); auto iterator = std::make_shared( - object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size); + object_storage, configuration, predicate, virtual_columns, local_context, + nullptr, settings.list_object_keys_size, local_context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index a8bde4cd56f..d91850bf99c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -362,9 +362,9 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } else { - const auto key_with_globs = configuration_->getPath(); - auto object_metadata = object_storage->getObjectMetadata(key_with_globs); - auto object_info = std::make_shared(key_with_globs, object_metadata); + const auto object_key = configuration_->getPath(); + auto object_metadata = object_storage->getObjectMetadata(object_key); + auto object_info = std::make_shared(object_key, object_metadata); object_infos.emplace_back(object_info); if (read_keys) @@ -381,12 +381,11 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor { std::lock_guard lock(next_mutex); - if (is_finished) + bool current_batch_processed = object_infos.empty() || index >= object_infos.size(); + if (is_finished && current_batch_processed) return {}; - bool need_new_batch = object_infos.empty() || index >= object_infos.size(); - - if (need_new_batch) + if (current_batch_processed) { ObjectInfos new_batch; while (new_batch.empty()) @@ -439,11 +438,10 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor } } - size_t current_index = index++; - if (current_index >= object_infos.size()) + if (index >= object_infos.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); - return object_infos[current_index]; + return object_infos[index++]; } StorageObjectStorageSource::KeysIterator::KeysIterator( @@ -532,7 +530,11 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( pool.wait(); buffer.reserve(max_threads_count); for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get(), std::nullopt)); + { + auto key = key_future.get(); + if (!key.empty()) + buffer.emplace_back(std::make_shared(key, std::nullopt)); + } } ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::next(size_t) diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 55b41cf6ca8..4ec94cfaf7c 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -43,7 +43,7 @@ StoragePtr TableFunctionObjectStorageCluster Date: Mon, 19 Feb 2024 20:29:22 +0100 Subject: [PATCH 025/651] Fix style check --- src/Disks/ObjectStorages/S3/diskSettings.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 43b1cffb3e6..6fec4758456 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -22,6 +22,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} std::unique_ptr getSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) From d88f8646b180f0ca4fec7bab5c9c9c7cc7574c0c Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Feb 2024 11:03:12 +0100 Subject: [PATCH 026/651] Fix after merge with master --- src/Coordination/Standalone/Context.cpp | 15 +++++++++++++++ src/Coordination/Standalone/Context.h | 3 +++ src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/Storages/ObjectStorage/S3/Configuration.cpp | 2 +- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 374610769c4..c16ecbfd5c3 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -77,6 +77,8 @@ struct ContextSharedPart : boost::noncopyable mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes + + std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage }; ContextData::ContextData() = default; @@ -382,4 +384,17 @@ std::shared_ptr Context::getZooKeeper() const throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); } +const StorageS3Settings & Context::getStorageS3Settings() const +{ + std::lock_guard lock(shared->mutex); + + if (!shared->storage_s3_settings) + { + const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); + shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef()); + } + + return *shared->storage_s3_settings; +} + } diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index 49ad2b568fe..3346a865f0f 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -36,6 +36,7 @@ class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class BlobStorageLog; class IOUringReader; +class StorageS3Settings; /// A small class which owns ContextShared. /// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. @@ -160,6 +161,8 @@ public: void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); zkutil::ZooKeeperPtr getZooKeeper() const; + + const StorageS3Settings & getStorageS3Settings() const; }; } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a75a747f334..0869e2ebbd2 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -552,7 +552,7 @@ void S3ObjectStorage::applyNewSettings( static_headers.begin(), static_headers.end()); } - if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString())) + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName())) new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); auto current_s3_settings = s3_settings.get(); diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 896131e74d7..47e7ebd53a6 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -86,7 +86,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, headers_from_ast.begin(), headers_from_ast.end()); } - if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString())) + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName())) s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); auto client = getClient(config, config_prefix, context, *s3_settings, false, &url); From 94c44cefc89fbb471505aedd803600bc8ace7a49 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Feb 2024 16:24:23 +0100 Subject: [PATCH 027/651] Fix clang tidy --- src/Storages/ObjectStorage/AzureBlob/Configuration.cpp | 5 +---- src/Storages/ObjectStorage/HDFS/Configuration.cpp | 4 +--- src/Storages/ObjectStorage/S3/Configuration.cpp | 5 +---- .../ObjectStorage/StorageObjectStorageConfiguration.cpp | 7 +++++++ .../ObjectStorage/StorageObjectStorageConfiguration.h | 1 + 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 9d21541e7e2..7a670441e72 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -78,6 +78,7 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const } StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other) + : StorageObjectStorageConfiguration(other) { connection_url = other.connection_url; is_connection_string = other.is_connection_string; @@ -86,10 +87,6 @@ StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureB container = other.container; blob_path = other.blob_path; blobs_paths = other.blobs_paths; - - format = other.format; - compression_method = other.compression_method; - structure = other.structure; } AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context) diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 731b05f4621..2f2427edb24 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -12,13 +12,11 @@ namespace DB { StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) + : StorageObjectStorageConfiguration(other) { url = other.url; path = other.path; paths = other.paths; - format = other.format; - compression_method = other.compression_method; - structure = other.structure; } void StorageHDFSConfiguration::check(ContextPtr context) const diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 47e7ebd53a6..1e14ccc4b31 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -57,15 +57,12 @@ void StorageS3Configuration::check(ContextPtr context) const } StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) + : StorageObjectStorageConfiguration(other) { url = other.url; static_configuration = other.static_configuration; headers_from_ast = other.headers_from_ast; keys = other.keys; - - format = other.format; - compression_method = other.compression_method; - structure = other.structure; } ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index a1c7d468fa6..8a4dee2c31b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -20,6 +20,13 @@ void StorageObjectStorageConfiguration::initialize( configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); } +StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other) +{ + format = other.format; + compression_method = other.compression_method; + structure = other.structure; +} + bool StorageObjectStorageConfiguration::withWildcard() const { static const String PARTITION_ID_WILDCARD = "{_partition_id}"; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 2da262eb55d..47afbc5d0c6 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -12,6 +12,7 @@ class StorageObjectStorageConfiguration { public: StorageObjectStorageConfiguration() = default; + StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other); virtual ~StorageObjectStorageConfiguration() = default; using Path = std::string; From 6b5953859ec7fbd22728426e8110162b57b1b9aa Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 21 Feb 2024 17:59:11 +0100 Subject: [PATCH 028/651] Check for deserializeTextEscaped in other serializations, fix review changes --- .../SerializationAggregateFunction.cpp | 4 +- .../SerializationCustomSimpleText.cpp | 2 +- .../Serializations/SerializationEnum.cpp | 2 +- .../Serializations/SerializationObject.cpp | 4 +- .../Serializations/SerializationVariant.cpp | 4 +- src/IO/ReadHelpers.cpp | 42 ++++++++++++++----- src/IO/ReadHelpers.h | 1 - .../Impl/TabSeparatedRowInputFormat.cpp | 14 +++---- 8 files changed, 45 insertions(+), 28 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index c9af5d1f838..28a4fcf86da 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -150,10 +150,10 @@ void SerializationAggregateFunction::serializeTextEscaped(const IColumn & column } -void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String s; - readEscapedString(s, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); deserializeFromString(function, column, s, version); } diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp index abe443cab1b..a3b0b088b17 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp @@ -75,7 +75,7 @@ void SerializationCustomSimpleText::serializeTextEscaped(const IColumn & column, void SerializationCustomSimpleText::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String str; - readEscapedString(str, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(str, istr) : readEscapedString(str, istr); deserializeFromString(*this, column, str, settings); } diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp index 14b1a33e2ce..f44ae2fd4f9 100644 --- a/src/DataTypes/Serializations/SerializationEnum.cpp +++ b/src/DataTypes/Serializations/SerializationEnum.cpp @@ -29,7 +29,7 @@ void SerializationEnum::deserializeTextEscaped(IColumn & column, ReadBuffe { /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out. std::string field_name; - readEscapedString(field_name, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field_name, istr) : readEscapedString(field_name, istr); assert_cast(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true)); } } diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index e6dc16ef5a0..9d0ff5903b1 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -105,9 +105,9 @@ void SerializationObject::deserializeWholeText(IColumn & column, ReadBuf } template -void SerializationObject::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationObject::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); }); + deserializeTextImpl(column, [&](String & s) { settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); }); } template diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 5af94364167..a4e77b9c75f 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -604,14 +604,14 @@ void SerializationVariant::serializeTextEscaped(const IColumn & column, size_t r bool SerializationVariant::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String field; - readEscapedString(field, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field, istr) : readEscapedString(field, istr); return tryDeserializeTextEscapedImpl(column, field, settings); } void SerializationVariant::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String field; - readEscapedString(field, istr); + settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field, istr) : readEscapedString(field, istr); if (!tryDeserializeTextEscapedImpl(column, field, settings)) throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse escaped value of type {} here: {}", variant_name, field); } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 53a7229e7d5..e763d627f40 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -537,8 +537,19 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) } } - if (*buf.position() == '\r') - ++buf.position(); + if constexpr (support_crlf) + { + if (*buf.position() == '\r') + { + ++buf.position(); + if (!buf.eof() && *buf.position() != '\n') + { + s.push_back('\r'); + continue; + } + return; + } + } } } @@ -555,11 +566,10 @@ void readEscapedString(String & s, ReadBuffer & buf) readEscapedStringInto(s, buf); } -template void readEscapedStringCRLF(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringInto(s, buf); + readEscapedStringInto(s, buf); } template void readEscapedStringInto,false>(PaddedPODArray & s, ReadBuffer & buf); @@ -567,9 +577,6 @@ template void readEscapedStringInto(NullOutput & s, ReadBuffer template void readEscapedStringInto,true>(PaddedPODArray & s, ReadBuffer & buf); template void readEscapedStringInto(NullOutput & s, ReadBuffer & buf); -template void readEscapedStringCRLF(String & s, ReadBuffer & buf); -template void readEscapedStringCRLF(String & s, ReadBuffer & buf); - /** If enable_sql_style_quoting == true, * strings like 'abc''def' will be parsed as abc'def. * Please note, that even with SQL style quoting enabled, @@ -1975,13 +1982,26 @@ bool tryReadJSONField(String & s, ReadBuffer & buf) return readParsedValueInto(s, buf, parse_func); } -template +template +void readTSVFieldImpl(String & s, ReadBuffer & buf) +{ + if constexpr (supports_crlf) + readEscapedStringIntoImpl(s, buf); + else + readEscapedStringIntoImpl(s, buf); +} + void readTSVField(String & s, ReadBuffer & buf) { s.clear(); - readEscapedStringIntoImpl(s, buf); + readTSVFieldImpl(s, buf); } -template void readTSVField(String & s, ReadBuffer & buf); -template void readTSVField(String & s, ReadBuffer & buf); +void readTSVFieldCRLF(String & s, ReadBuffer & buf) +{ + s.clear(); + readTSVFieldImpl(s, buf); +} + + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 79014666ce1..3a20d2480b8 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -581,7 +581,6 @@ void readString(String & s, ReadBuffer & buf); void readEscapedString(String & s, ReadBuffer & buf); -template void readEscapedStringCRLF(String & s, ReadBuffer & buf); void readQuotedString(String & s, ReadBuffer & buf); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index dbd939effe1..c92cd1c39a0 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -24,17 +24,14 @@ namespace ErrorCodes /** Check for a common error case - usage of Windows line feed. */ -template static void checkForCarriageReturn(ReadBuffer & in) { - bool crlf_escaped = false; - if constexpr (supports_crlf) - crlf_escaped = true; - if (!in.eof() && (in.position()[0] == '\r' || (crlf_escaped ? false : (in.position() != in.buffer().begin() && in.position()[-1] == '\r')))) + if (!in.eof() && (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))) throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." " You must transform your file to Unix format." - "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r."); + "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r" + "\nor else enable setting 'input_format_tsv_crlf_end_of_line'"); } TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( @@ -104,7 +101,8 @@ void TabSeparatedFormatReader::skipRowEndDelimiter() } if (unlikely(first_row)) { - supports_crlf ? checkForCarriageReturn(*buf) : checkForCarriageReturn(*buf); + if (!supports_crlf) + checkForCarriageReturn(*buf); first_row = false; } assertChar('\n', *buf); @@ -120,7 +118,7 @@ String TabSeparatedFormatReader::readFieldIntoString() else { if constexpr (read_string) - support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedStringCRLF(field, *buf); + support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedString(field, *buf); else support_crlf ? readTSVField(field, *buf) : readTSVField(field, *buf); } From e60ead336ab14f0dfaba1a34022d0b9e0bbf82c2 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 21 Feb 2024 18:14:22 +0100 Subject: [PATCH 029/651] remove readEscapedStringInto function --- src/IO/ReadHelpers.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index e763d627f40..dec8a14fae7 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1982,25 +1982,16 @@ bool tryReadJSONField(String & s, ReadBuffer & buf) return readParsedValueInto(s, buf, parse_func); } -template -void readTSVFieldImpl(String & s, ReadBuffer & buf) -{ - if constexpr (supports_crlf) - readEscapedStringIntoImpl(s, buf); - else - readEscapedStringIntoImpl(s, buf); -} - void readTSVField(String & s, ReadBuffer & buf) { s.clear(); - readTSVFieldImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } void readTSVFieldCRLF(String & s, ReadBuffer & buf) { s.clear(); - readTSVFieldImpl(s, buf); + readEscapedStringIntoImpl(s, buf); } From e46c3c63fae5aa1d6ae17b53aa03e5b07ba5220b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 21 Feb 2024 18:24:58 +0100 Subject: [PATCH 030/651] check for return in skipRowEndDelimiter --- .../Formats/Impl/TabSeparatedRowInputFormat.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index c92cd1c39a0..93982526ddc 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -92,19 +92,20 @@ void TabSeparatedFormatReader::skipFieldDelimiter() void TabSeparatedFormatReader::skipRowEndDelimiter() { - bool supports_crlf = format_settings.tsv.crlf_end_of_line_input; if (buf->eof()) return; - if (supports_crlf && first_row==false) + + if (format_settings.tsv.crlf_end_of_line_input) { - ++buf->position(); + if (*buf->position() == '\r') + ++buf->position(); } - if (unlikely(first_row)) + else if (unlikely(first_row)) { - if (!supports_crlf) - checkForCarriageReturn(*buf); + checkForCarriageReturn(*buf); first_row = false; } + assertChar('\n', *buf); } From 4e3f2aae408fc8559304fe4f7c4a21db3d9202a6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Feb 2024 18:47:17 +0100 Subject: [PATCH 031/651] Fix keeper build --- src/Coordination/Standalone/Context.cpp | 1 + src/Coordination/Standalone/Context.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index c16ecbfd5c3..7e8711c7910 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index 3346a865f0f..943fcd106df 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -163,6 +163,8 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; const StorageS3Settings & getStorageS3Settings() const; + + const String & getUserName() const { static std::string user; return user; } }; } From 5f06c72bfc86b20e1ed50a255a121b6a334fa229 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 21 Feb 2024 20:36:10 +0100 Subject: [PATCH 032/651] check for return in deserializeTextEscaped implementations for SerializeBool and SerializeNullable --- .../Serializations/SerializationBool.cpp | 7 +++- .../Serializations/SerializationNullable.cpp | 40 ++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index f745fac4d30..0cf9cb8be49 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -242,8 +242,11 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is { if (istr.eof()) throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF."); - - deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); + + if (settings.tsv.crlf_end_of_line_input) + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'; }); + else + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); } bool SerializationBool::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 4d31451f92d..9e78b1285db 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -286,7 +286,7 @@ bool SerializationNullable::tryDeserializeNullRaw(DB::ReadBuffer & istr, const D } template -ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null) +ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null) { static constexpr bool throw_exception = std::is_same_v; @@ -319,13 +319,23 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, /// Check if we have enough data in buffer to check if it's a null. if (istr.available() > null_representation.size()) { - auto check_for_null = [&null_representation](ReadBuffer & buf) + auto check_for_null = [&null_representation, settings](ReadBuffer & buf) { auto * pos = buf.position(); - if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n')) - return true; - buf.position() = pos; - return false; + if (settings.tsv.crlf_end_of_line_input) + { + if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'|| *buf.position() == '\r')) + return true; + buf.position() = pos; + return false; + } + else + { + if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n')) + return true; + buf.position() = pos; + return false; + } }; return deserializeImpl(column, istr, check_for_null, deserialize_nested, is_null); } @@ -334,14 +344,22 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, /// Use PeekableReadBuffer to make a checkpoint before checking null /// representation and rollback if check was failed. PeekableReadBuffer peekable_buf(istr, true); - auto check_for_null = [&null_representation](ReadBuffer & buf_) + auto check_for_null = [&null_representation, settings](ReadBuffer & buf_) { auto & buf = assert_cast(buf_); buf.setCheckpoint(); SCOPE_EXIT(buf.dropCheckpoint()); - if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n')) - return true; + if (settings.tsv.crlf_end_of_line_input) + { + if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r')) + return true; + } + else + { + if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n')) + return true; + } buf.rollbackToCheckpoint(); return false; }; @@ -372,6 +390,10 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " "containing '\\t' or '\\n' may not work correctly for large input."); + + if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos) + throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " + "containing '\\r' may not work correctly for large input."); WriteBufferFromOwnString parsed_value; if constexpr (escaped) From 80eb0c37826de63d9e2b595c62c37abbbb9c16ab Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Feb 2024 20:47:25 +0100 Subject: [PATCH 033/651] Fix for hdfs --- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 28 +++++++++++++------ src/Storages/HDFS/WriteBufferFromHDFS.cpp | 7 +++-- .../ObjectStorage/HDFS/Configuration.cpp | 14 +++++++--- .../ObjectStorage/ReadBufferIterator.cpp | 12 ++++---- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index fa5e227d853..360403b7f2d 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -36,10 +36,10 @@ ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & bool HDFSObjectStorage::exists(const StoredObject & object) const { - const auto & path = object.remote_path; - const size_t begin_of_path = path.find('/', path.find("//") + 2); - const String remote_fs_object_path = path.substr(begin_of_path); - return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); + // const auto & path = object.remote_path; + // const size_t begin_of_path = path.find('/', path.find("//") + 2); + // const String remote_fs_object_path = path.substr(begin_of_path); + return (0 == hdfsExists(hdfs_fs.get(), object.remote_path.c_str())); } std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT @@ -86,9 +86,12 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); + auto path = object.remote_path.starts_with('/') ? object.remote_path.substr(1) : object.remote_path; + path = fs::path(hdfs_root_path) / path; + /// Single O_WRONLY in libhdfs adds O_TRUNC return std::make_unique( - object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size, + path, config, settings->replication, patchSettings(write_settings), buf_size, mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); } @@ -124,11 +127,18 @@ void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects) removeObjectIfExists(object); } -ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const +ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const { - throw Exception( - ErrorCodes::UNSUPPORTED_METHOD, - "HDFS API doesn't support custom attributes/metadata for stored objects"); + auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data()); + if (!file_info) + throw Exception(ErrorCodes::HDFS_ERROR, "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError()); + + ObjectMetadata metadata; + metadata.size_bytes = static_cast(file_info->mSize); + metadata.last_modified = file_info->mLastMod; + + hdfsFreeFileInfo(file_info, 1); + return metadata; } void HDFSObjectStorage::copyObject( /// NOLINT diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index 173dd899ada..9d383aa8245 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -48,12 +48,13 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); const String path = hdfs_uri.substr(begin_of_path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); if (fout == nullptr) { - throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} error: {}", - path, std::string(hdfsGetLastError())); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} ({}) error: {}", + path, hdfs_uri, std::string(hdfsGetLastError())); } } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 2f2427edb24..a64faafd53d 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -22,13 +22,14 @@ StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguratio void StorageHDFSConfiguration::check(ContextPtr context) const { context->getRemoteHostFilter().checkURL(Poco::URI(url)); - checkHDFSURL(url); + checkHDFSURL(fs::path(url) / path); } ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { UNUSED(is_readonly); auto settings = std::make_unique(); + chassert(!url.empty()); return std::make_shared(url, std::move(settings), context->getConfigRef()); } @@ -36,15 +37,20 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_str { url = checkAndGetLiteralArgument(args[0], "url"); - String format_name = "auto"; if (args.size() > 1) - format_name = checkAndGetLiteralArgument(args[1], "format_name"); + format = checkAndGetLiteralArgument(args[1], "format_name"); + else + format = "auto"; - String compression_method; if (args.size() == 3) compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); else compression_method = "auto"; + + const size_t begin_of_path = url.find('/', url.find("//") + 2); + path = url.substr(begin_of_path + 1); + url = url.substr(0, begin_of_path); + paths = {path}; } } diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index a0e719878ac..dd4bfe79b06 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -195,19 +195,19 @@ ReadBufferIterator::Data ReadBufferIterator::next() throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); + "in {} or all files are empty. You can specify table structure manually", + *format, object_storage->getName()); throw Exception( ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); + "in {} or all files are empty. You can specify the format manually", object_storage->getName()); } return {nullptr, std::nullopt, format}; } - /// S3 file iterator could get new keys after new iteration + /// file iterator could get new keys after new iteration if (read_keys.size() > prev_read_keys_size) { /// If format is unknown we can try to determine it by new file names. @@ -234,7 +234,7 @@ ReadBufferIterator::Data ReadBufferIterator::next() prev_read_keys_size = read_keys.size(); } - if (getContext()->getSettingsRef().s3_skip_empty_files + if (query_settings.skip_empty_files && current_object_info->metadata && current_object_info->metadata->size_bytes == 0) continue; @@ -255,7 +255,7 @@ ReadBufferIterator::Data ReadBufferIterator::next() {}, current_object_info->metadata->size_bytes); - if (!getContext()->getSettingsRef().s3_skip_empty_files || !read_buffer->eof()) + if (!query_settings.skip_empty_files || !read_buffer->eof()) { first = false; From f23ddec69f51481b8a7c3b923ae5e9dbb3891b41 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Feb 2024 11:50:36 +0100 Subject: [PATCH 034/651] Fix unit tests build --- src/IO/tests/gtest_writebuffer_s3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index ae00bb2e9e2..7856f22ab1a 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -546,7 +546,7 @@ public: std::unique_ptr getWriteBuffer(String file_name = "file") { S3Settings::RequestSettings request_settings; - request_settings.updateFromSettings(settings); + request_settings.updateFromSettingsIfChanged(settings); client->resetCounters(); From 26a2fcf65a1702f71cc8cb6167d5622d55c00ae6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 22 Feb 2024 12:46:13 +0100 Subject: [PATCH 035/651] Fix style-check --- src/DataTypes/Serializations/SerializationBool.cpp | 1 - src/DataTypes/Serializations/SerializationNullable.cpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index 0cf9cb8be49..94bc724fd5d 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -242,7 +242,6 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is { if (istr.eof()) throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF."); - if (settings.tsv.crlf_end_of_line_input) deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'; }); else diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 9e78b1285db..bb6adf77b32 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -389,8 +389,7 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " - "containing '\\t' or '\\n' may not work correctly for large input."); - + "containing '\\t' or '\\n' may not work correctly for large input."); if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " "containing '\\r' may not work correctly for large input."); From 7f452aa830501ec4d800866b69fab7d158d4896c Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 22 Feb 2024 13:41:03 +0100 Subject: [PATCH 036/651] Update SerializationNullable.cpp Fix style-check --- src/DataTypes/Serializations/SerializationNullable.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index bb6adf77b32..566221e2371 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -389,11 +389,11 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " - "containing '\\t' or '\\n' may not work correctly for large input."); + "containing '\\t' or '\\n' may not work correctly for large input."); if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " - "containing '\\r' may not work correctly for large input."); - + "containing '\\r' may not work correctly for large input."); + WriteBufferFromOwnString parsed_value; if constexpr (escaped) nested_serialization->serializeTextEscaped(nested_column, nested_column.size() - 1, parsed_value, settings); From b548ed976d11309f8fb3b643ab71d9fd7d26ab31 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Feb 2024 14:45:29 +0100 Subject: [PATCH 037/651] Fxi --- src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 5d77d4ced60..d7940851b00 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -54,7 +54,7 @@ private: const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; - const String & engine_name; + const String engine_name; const Storage::ConfigurationPtr configuration; const ObjectStoragePtr object_storage; NamesAndTypesList virtual_columns; From e78ab3e06377502068830bfe27f69777f3497cdd Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:20:54 +0100 Subject: [PATCH 038/651] Update src/DataTypes/Serializations/SerializationNullable.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- .../Serializations/SerializationNullable.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 566221e2371..5aca15e46f0 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -350,16 +350,8 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr buf.setCheckpoint(); SCOPE_EXIT(buf.dropCheckpoint()); - if (settings.tsv.crlf_end_of_line_input) - { - if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r')) - return true; - } - else - { - if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n')) - return true; - } + if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || (settings.tsv.crlf_end_of_line_input && *buf.position() == '\r'))) + return true; buf.rollbackToCheckpoint(); return false; }; From a458797015eb7d136edf878ac9464c8e6ffdad75 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:21:13 +0100 Subject: [PATCH 039/651] Update src/DataTypes/Serializations/SerializationNullable.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/DataTypes/Serializations/SerializationNullable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 5aca15e46f0..e9acab7a2a3 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -344,7 +344,7 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr /// Use PeekableReadBuffer to make a checkpoint before checking null /// representation and rollback if check was failed. PeekableReadBuffer peekable_buf(istr, true); - auto check_for_null = [&null_representation, settings](ReadBuffer & buf_) + auto check_for_null = [&null_representation, &settings](ReadBuffer & buf_) { auto & buf = assert_cast(buf_); buf.setCheckpoint(); From 03d0dd661feaf19d62a6969fc8d895200e410b38 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:21:19 +0100 Subject: [PATCH 040/651] Update src/DataTypes/Serializations/SerializationNullable.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/DataTypes/Serializations/SerializationNullable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index e9acab7a2a3..aef0a814f24 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -319,7 +319,7 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr /// Check if we have enough data in buffer to check if it's a null. if (istr.available() > null_representation.size()) { - auto check_for_null = [&null_representation, settings](ReadBuffer & buf) + auto check_for_null = [&null_representation, &settings](ReadBuffer & buf) { auto * pos = buf.position(); if (settings.tsv.crlf_end_of_line_input) From c83179bc70b5363a839d71d6f34af54807ad1d82 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:21:28 +0100 Subject: [PATCH 041/651] Update src/DataTypes/Serializations/SerializationNullable.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- .../Serializations/SerializationNullable.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index aef0a814f24..c796c147f1f 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -322,20 +322,10 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr auto check_for_null = [&null_representation, &settings](ReadBuffer & buf) { auto * pos = buf.position(); - if (settings.tsv.crlf_end_of_line_input) - { - if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'|| *buf.position() == '\r')) - return true; - buf.position() = pos; - return false; - } - else - { - if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n')) - return true; - buf.position() = pos; - return false; - } + if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n' || (settings.tsv.crlf_end_of_line_input && *buf.position() == '\r'))) + return true; + buf.position() = pos; + return false; }; return deserializeImpl(column, istr, check_for_null, deserialize_nested, is_null); } From 230cc512f86ede5e989a8a41a8abaaf15cfaebdd Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 27 Feb 2024 20:40:55 +0100 Subject: [PATCH 042/651] Handle CRLF in TabSeparatedRowInputFormat --- .../Formats/Impl/TabSeparatedRowInputFormat.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 93982526ddc..f60a64b18e0 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -164,7 +164,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { const bool at_delimiter = !is_last_file_column && !buf->eof() && *buf->position() == '\t'; - const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n'); + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r')); if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -229,7 +229,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) try { - assertChar('\n', *buf); + if (!format_settings.tsv.crlf_end_of_line_input) + assertChar('\n', *buf); + else + assertChar('\r', *buf); } catch (const DB::Exception &) { @@ -242,7 +245,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where line feed is expected." - " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; + " It's like your file has DOS/Windows style line separators. \n" + "You must transform your file to Unix format. \n" + "But if you really need carriage return at end of string value of last column, you need to escape it as \\r \n" + "or else enable setting 'input_format_tsv_crlf_end_of_line'"; } else { @@ -357,7 +363,7 @@ void TabSeparatedFormatReader::skipRow() bool TabSeparatedFormatReader::checkForEndOfRow() { - return buf->eof() || *buf->position() == '\n'; + return buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r'); } TabSeparatedSchemaReader::TabSeparatedSchemaReader( From e77d1076b8bc0a5163a07966d26010fb4461ffe4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2024 23:03:22 +0100 Subject: [PATCH 043/651] Fix crash in libunwind while interpreting debug info --- contrib/libunwind | 2 +- src/Common/examples/CMakeLists.txt | 3 ++ src/Common/examples/check_pointer_valid.cpp | 52 +++++++++++++++++++++ src/Daemon/BaseDaemon.cpp | 28 +++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 src/Common/examples/check_pointer_valid.cpp diff --git a/contrib/libunwind b/contrib/libunwind index 40d8eadf96b..e4e1c06f4bc 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965 +Subproject commit e4e1c06f4bc31adb3fa20e517779171b4b521c78 diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 90a238c9800..73e1396fb35 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -87,3 +87,6 @@ if (ENABLE_SSL) clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) target_link_libraries (encrypt_decrypt PRIVATE dbms) endif() + +clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) +target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io) diff --git a/src/Common/examples/check_pointer_valid.cpp b/src/Common/examples/check_pointer_valid.cpp new file mode 100644 index 00000000000..4baf8f2f01d --- /dev/null +++ b/src/Common/examples/check_pointer_valid.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include + + +/// This example demonstrates how is it possible to check if a pointer to memory is readable using a signal handler. + +thread_local bool checking_pointer = false; +thread_local jmp_buf signal_jump_buffer; + + +void signalHandler(int sig, siginfo_t *, void *) +{ + if (checking_pointer && sig == SIGSEGV) + siglongjmp(signal_jump_buffer, 1); +} + +bool isPointerValid(const void * ptr) +{ + checking_pointer = true; + if (0 == sigsetjmp(signal_jump_buffer, 1)) + { + char res; + memcpy(&res, ptr, 1); + __asm__ __volatile__("" :: "r"(res) : "memory"); + return true; + } + else + { + checking_pointer = false; + return false; + } +} + +int main(int, char **) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = signalHandler; + sa.sa_flags = SA_SIGINFO; + + if (sigemptyset(&sa.sa_mask) + || sigaddset(&sa.sa_mask, SIGSEGV) + || sigaction(SIGSEGV, &sa, nullptr)) + return 1; + + std::cerr << isPointerValid(reinterpret_cast(0x123456789)) << "\n"; + std::cerr << isPointerValid(&sa) << "\n"; + + return 0; +} diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 289a41bb75e..0e781baf0d4 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -137,10 +137,18 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) static std::atomic_flag fatal_error_printed; +/// Special handling for a function isPointerReadable. +thread_local bool checking_pointer = false; +thread_local jmp_buf signal_jump_buffer; + /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. */ static void signalHandler(int sig, siginfo_t * info, void * context) { + /// Special handling for a function isPointerReadable. + if (checking_pointer && sig == SIGSEGV) + siglongjmp(signal_jump_buffer, 1); + DENY_ALLOCATIONS_IN_SCOPE; auto saved_errno = errno; /// We must restore previous value of errno in signal handler. @@ -182,6 +190,26 @@ static void signalHandler(int sig, siginfo_t * info, void * context) errno = saved_errno; } +/// This function can be used from other translation units, +/// For example, from libunwind while parsing debug info, which is unsafe. +/// Note: we are checking only the first byte, which is ok for aligned words. +extern "C" bool isPointerReadable(const void * ptr) +{ + checking_pointer = true; + if (0 == sigsetjmp(signal_jump_buffer, 1)) + { + char res; + memcpy(&res, ptr, 1); + __asm__ __volatile__("" :: "r"(res) : "memory"); + return true; + } + else + { + checking_pointer = false; + return false; + } +} + static bool getenvBool(const char * name) { bool res = false; From 676519ce90a5cf3e39415885ad72c09d5202b09a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2024 23:06:47 +0100 Subject: [PATCH 044/651] Fix crash in libunwind while interpreting debug info --- src/Common/examples/check_pointer_valid.cpp | 1 + src/Daemon/BaseDaemon.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Common/examples/check_pointer_valid.cpp b/src/Common/examples/check_pointer_valid.cpp index 4baf8f2f01d..a6b6ee7027d 100644 --- a/src/Common/examples/check_pointer_valid.cpp +++ b/src/Common/examples/check_pointer_valid.cpp @@ -24,6 +24,7 @@ bool isPointerValid(const void * ptr) char res; memcpy(&res, ptr, 1); __asm__ __volatile__("" :: "r"(res) : "memory"); + checking_pointer = false; return true; } else diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 0e781baf0d4..2ca51ae72e0 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -201,6 +201,7 @@ extern "C" bool isPointerReadable(const void * ptr) char res; memcpy(&res, ptr, 1); __asm__ __volatile__("" :: "r"(res) : "memory"); + checking_pointer = false; return true; } else From b1e67c7bc6a45dd06e27b44a56531bca1745268e Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 28 Feb 2024 09:55:16 +0000 Subject: [PATCH 045/651] s/jmp_buf/sigjmp_buf/ --- src/Daemon/BaseDaemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 2ca51ae72e0..b6ebe80b07d 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -139,7 +139,7 @@ static std::atomic_flag fatal_error_printed; /// Special handling for a function isPointerReadable. thread_local bool checking_pointer = false; -thread_local jmp_buf signal_jump_buffer; +thread_local sigjmp_buf signal_jump_buffer; /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. */ From 69bb01e77a15cad1e022b7d8234b61373243070c Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 29 Feb 2024 21:49:27 +0100 Subject: [PATCH 046/651] Fix style-check --- src/DataTypes/Serializations/SerializationNullable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index c796c147f1f..06361e24aa2 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -375,7 +375,7 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " "containing '\\r' may not work correctly for large input."); - + WriteBufferFromOwnString parsed_value; if constexpr (escaped) nested_serialization->serializeTextEscaped(nested_column, nested_column.size() - 1, parsed_value, settings); From a95e7de257455a95855859c7f3b8d64583025027 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 02:44:55 +0100 Subject: [PATCH 047/651] Support for MSan --- src/Daemon/BaseDaemon.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index b6ebe80b07d..b7e0f765799 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -200,6 +200,7 @@ extern "C" bool isPointerReadable(const void * ptr) { char res; memcpy(&res, ptr, 1); + __msan_unpoison(res, 1); __asm__ __volatile__("" :: "r"(res) : "memory"); checking_pointer = false; return true; From a67d862bd02dfea8be4fad1bcc984fe6194c021d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 02:47:15 +0100 Subject: [PATCH 048/651] Support for sanitizers --- src/Daemon/BaseDaemon.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index b7e0f765799..c167ca1baa0 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -190,6 +190,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) errno = saved_errno; } +#if !defined(SANITIZER) /// This function can be used from other translation units, /// For example, from libunwind while parsing debug info, which is unsafe. /// Note: we are checking only the first byte, which is ok for aligned words. @@ -200,7 +201,6 @@ extern "C" bool isPointerReadable(const void * ptr) { char res; memcpy(&res, ptr, 1); - __msan_unpoison(res, 1); __asm__ __volatile__("" :: "r"(res) : "memory"); checking_pointer = false; return true; @@ -211,6 +211,7 @@ extern "C" bool isPointerReadable(const void * ptr) return false; } } +#endif static bool getenvBool(const char * name) { From 1899a3062e9f60e8dc07984fb10113c7dcc2e72c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Mar 2024 02:48:31 +0100 Subject: [PATCH 049/651] Clang-tidy --- src/Common/examples/check_pointer_valid.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/examples/check_pointer_valid.cpp b/src/Common/examples/check_pointer_valid.cpp index a6b6ee7027d..e59ebf43327 100644 --- a/src/Common/examples/check_pointer_valid.cpp +++ b/src/Common/examples/check_pointer_valid.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include From db71a6fa7777dbf4ec8f1b67862edbbbc992ab90 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 5 Mar 2024 17:48:48 +0000 Subject: [PATCH 050/651] Fix race in refreshable materialized views causing SELECT to fail sometimes --- src/Storages/StorageMaterializedView.cpp | 9 +++++++-- src/Storages/StorageMaterializedView.h | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 1d0898a2f11..b8361109cb2 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -153,6 +153,7 @@ StorageMaterializedView::StorageMaterializedView( if (query.refresh_strategy) { + fixed_uuid = query.refresh_strategy->append; refresher = RefreshTask::create( *this, getContext(), @@ -629,10 +630,14 @@ void StorageMaterializedView::onActionLockRemove(StorageActionBlockType action_t refresher->start(); } -DB::StorageID StorageMaterializedView::getTargetTableId() const +StorageID StorageMaterializedView::getTargetTableId() const { std::lock_guard guard(target_table_id_mutex); - return target_table_id; + auto id = target_table_id; + /// TODO: Avoid putting uuid into target_table_id in the first place, instead of clearing it here. + if (!fixed_uuid) + id.uuid = UUIDHelpers::Nil; + return id; } void StorageMaterializedView::setTargetTableId(DB::StorageID id) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 4d574a821ec..4d070f4e40d 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -111,6 +111,10 @@ private: bool has_inner_table = false; + /// If false, inner table is replaced on each refresh. In that case, target_table_id doesn't + /// have UUID, and we do inner table lookup by name instead. + bool fixed_uuid = true; + friend class RefreshTask; void checkStatementCanBeForwarded() const; From 2ad8ab2a5719bbaeb8a1c3216cd93b760534c59a Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 5 Mar 2024 19:12:49 +0100 Subject: [PATCH 051/651] Fix linker errors --- src/Formats/EscapingRuleUtils.cpp | 2 +- src/IO/ReadHelpers.h | 2 +- src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 6b254102bdf..c7a6cb18625 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -236,7 +236,7 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin if constexpr (read_string) readEscapedString(result, buf); else - readTSVField(result, buf); + readTSVField(result, buf); break; default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read value with {} escaping rule", escapingRuleToString(escaping_rule)); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 3a20d2480b8..f8e5887b82b 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1897,8 +1897,8 @@ bool tryReadQuotedField(String & s, ReadBuffer & buf); void readJSONField(String & s, ReadBuffer & buf); bool tryReadJSONField(String & s, ReadBuffer & buf); -template void readTSVField(String & s, ReadBuffer & buf); +void readTSVFieldCRLF(String & s, ReadBuffer & buf); /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters). * It is assumed that the cursor is located on the `\` symbol diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index f60a64b18e0..a800bf41ac9 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -121,7 +121,7 @@ String TabSeparatedFormatReader::readFieldIntoString() if constexpr (read_string) support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedString(field, *buf); else - support_crlf ? readTSVField(field, *buf) : readTSVField(field, *buf); + support_crlf ? readTSVFieldCRLF(field, *buf) : readTSVField(field, *buf); } return field; } From 34e0fbd83ee0488584bbd75e670dd585095f4e55 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 5 Mar 2024 20:18:40 +0000 Subject: [PATCH 052/651] Oops, append mode doesn't exist yet --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index b8361109cb2..2a6e5cf2e03 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -153,7 +153,7 @@ StorageMaterializedView::StorageMaterializedView( if (query.refresh_strategy) { - fixed_uuid = query.refresh_strategy->append; + fixed_uuid = false; refresher = RefreshTask::create( *this, getContext(), From 2939ea07c8192aa9ed3bd8c75fe9ea42ded0a9cf Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 04:31:49 +0100 Subject: [PATCH 053/651] Update 02973_parse_crlf_with_tsv_files.sh Fix Fuzzer failing on parallel file creation/deletion issue --- .../0_stateless/02973_parse_crlf_with_tsv_files.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index cb7472be418..df03da4d42b 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -6,8 +6,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation step USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/data_without_crlf.tsv -DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/data_with_crlf.tsv +FILE_NAME_UNIX = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" +FILE_NAME_CRLF = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" +DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_UNIX +DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_CRLF touch $DATA_FILE_UNIX_ENDINGS touch $DATA_FILE_DOS_ENDINGS @@ -16,11 +18,11 @@ echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t3 echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS echo -e "<-- Read UNIX endings -->\n" -$CLICKHOUSE_CLIENT --query "SELECT * FROM file(data_without_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" -$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_UNIX}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" +$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n" -$CLICKHOUSE_CLIENT --query "SELECT * FROM file(data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" # Test teardown rm $DATA_FILE_UNIX_ENDINGS From 7fa7d81c92007a8e6e8f4be2c3e348a20c3f3cd2 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 06:03:24 +0100 Subject: [PATCH 054/651] Fix shell style check Remove space around = --- tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index df03da4d42b..345a01bab88 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -6,8 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation step USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME_UNIX = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" -FILE_NAME_CRLF = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" +FILE_NAME_UNIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" +FILE_NAME_CRLF="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_UNIX DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_CRLF From 2f2139d53b4497e7fc192d53a3474392dac5ad00 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 07:58:27 +0100 Subject: [PATCH 055/651] Update 02973_parse_crlf_with_tsv_files.sh --- .../queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index 345a01bab88..c36d65fa617 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -18,11 +18,11 @@ echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t3 echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS echo -e "<-- Read UNIX endings -->\n" -$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_UNIX}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" -$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" +$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n" -$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" # Test teardown rm $DATA_FILE_UNIX_ENDINGS From 37a17172ccbe4c0f3aeee145ba569ef109ad9efd Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 08:18:13 +0100 Subject: [PATCH 056/651] Update 02973_parse_crlf_with_tsv_files.sh --- .../queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index c36d65fa617..c521b936140 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -6,10 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation step USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME_UNIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" -FILE_NAME_CRLF="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" -DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_UNIX -DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_CRLF +DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" +DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" touch $DATA_FILE_UNIX_ENDINGS touch $DATA_FILE_DOS_ENDINGS From 361b23c007e1099a9dea11d26c019b9b1b3fb251 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 6 Mar 2024 18:14:58 +0100 Subject: [PATCH 057/651] Use unique test name for parallel tests --- .../0_stateless/02973_parse_crlf_with_tsv_files.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index c521b936140..c8a3d854d5a 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -6,9 +6,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation step USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" -DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" +UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" +DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" +DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" +DATA_FILE_DOS_ENDINGS="${USER_FILES_PATH:?}/${DOS_ENDINGS}" +echo $DATA_FILE_UNIX_ENDINGS touch $DATA_FILE_UNIX_ENDINGS touch $DATA_FILE_DOS_ENDINGS @@ -16,11 +19,11 @@ echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t3 echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS echo -e "<-- Read UNIX endings -->\n" -$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" -$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" +$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n" -$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" +$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;" # Test teardown rm $DATA_FILE_UNIX_ENDINGS From 0abed7aab1ed5d4aa160b03d64bb5846b5a982dc Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 6 Mar 2024 22:10:24 +0100 Subject: [PATCH 058/651] Update reference file --- .../0_stateless/02973_parse_crlf_with_tsv_files.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference index 88d203bd723..14cf3a564e4 100644 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference @@ -1,3 +1,4 @@ +/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv <-- Read UNIX endings --> Akiba_Hebrew_Academy 2017-08-01 241 From 188fe4a93afa8db75afc9b75e6450424d1c4d542 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 6 Mar 2024 22:11:47 +0100 Subject: [PATCH 059/651] Update reference file --- .../0_stateless/02973_parse_crlf_with_tsv_files.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference index 14cf3a564e4..88d203bd723 100644 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference @@ -1,4 +1,3 @@ -/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv <-- Read UNIX endings --> Akiba_Hebrew_Academy 2017-08-01 241 From bb393890e8b85f33f7e08236d2bcc61029c5c449 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 8 Mar 2024 20:05:26 +0100 Subject: [PATCH 060/651] Test passes locally --- .../0.2973_parse_crlf_with_tsv_files.reference | 12 ++++++++++++ .../0_stateless/02973_parse_crlf_with_tsv_files.sh | 1 - 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference diff --git a/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference new file mode 100644 index 00000000000..14cf3a564e4 --- /dev/null +++ b/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference @@ -0,0 +1,12 @@ +/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv +<-- Read UNIX endings --> + +Akiba_Hebrew_Academy 2017-08-01 241 +Aegithina_tiphia 2018-02-01 34 +1971-72_Utah_Stars_season 2016-10-01 1 + +<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 --> + +Akiba_Hebrew_Academy 2017-08-01 241 +Aegithina_tiphia 2018-02-01 34 +1971-72_Utah_Stars_season 2016-10-01 1 diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index c8a3d854d5a..14f28f1ba4a 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -11,7 +11,6 @@ DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" DATA_FILE_DOS_ENDINGS="${USER_FILES_PATH:?}/${DOS_ENDINGS}" -echo $DATA_FILE_UNIX_ENDINGS touch $DATA_FILE_UNIX_ENDINGS touch $DATA_FILE_DOS_ENDINGS From 784b656c6a8d19cacc184ff923bbff4d27882c5b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Mar 2024 00:44:36 +0100 Subject: [PATCH 061/651] Another approach --- contrib/libunwind | 2 +- src/Common/ProfileEvents.cpp | 1 + src/Common/QueryProfiler.cpp | 26 ++++++++++++++++++++++---- src/Common/StackTrace.cpp | 4 ++++ src/Common/StackTrace.h | 6 ++++++ src/Daemon/BaseDaemon.cpp | 31 ++----------------------------- 6 files changed, 36 insertions(+), 34 deletions(-) diff --git a/contrib/libunwind b/contrib/libunwind index e4e1c06f4bc..5c221df716e 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit e4e1c06f4bc31adb3fa20e517779171b4b521c78 +Subproject commit 5c221df716eaad6b5637bfdae593296db18d23a0 diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c1ac3d08245..4b66a920abb 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -361,6 +361,7 @@ The server successfully detected this situation and will download merged part fr M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \ M(QueryProfilerConcurrencyOverruns, "Number of times we drop processing of a query profiler signal due to too many concurrent query profilers in other threads, which may indicate overload.") \ M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \ + M(QueryProfilerErrors, "Invalid memory accesses during asynchronous stack unwinding.") \ \ M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \ M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \ diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 34ffbf6c498..5ec784527f7 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -11,7 +11,6 @@ #include #include -#include namespace CurrentMetrics { @@ -24,6 +23,7 @@ namespace ProfileEvents extern const Event QueryProfilerSignalOverruns; extern const Event QueryProfilerConcurrencyOverruns; extern const Event QueryProfilerRuns; + extern const Event QueryProfilerErrors; } namespace DB @@ -83,11 +83,29 @@ namespace #endif const auto signal_context = *reinterpret_cast(context); - const StackTrace stack_trace(signal_context); + std::optional stack_trace; + +#if defined(SANITIZER) + constexpr bool sanitizer = true; +#else + constexpr bool sanitizer = false; +#endif + + asynchronous_stack_unwinding = true; + if (sanitizer || 0 == sigsetjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1)) + { + stack_trace.emplace(signal_context); + } + else + { + ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerErrors); + } + asynchronous_stack_unwinding = false; + + if (stack_trace) + TraceSender::send(trace_type, *stack_trace, {}); - TraceSender::send(trace_type, stack_trace, {}); ProfileEvents::incrementNoTrace(ProfileEvents::QueryProfilerRuns); - errno = saved_errno; } diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 436b85ff30b..538aad5a79a 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -532,3 +532,7 @@ void StackTrace::dropCache() std::lock_guard lock{stacktrace_cache_mutex}; cacheInstance().clear(); } + + +thread_local bool asynchronous_stack_unwinding = false; +thread_local sigjmp_buf asynchronous_stack_unwinding_signal_jump_buffer; diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index a16d889a67a..1d72b0239a6 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef OS_DARWIN // ucontext is not available without _XOPEN_SOURCE @@ -87,3 +88,8 @@ protected: }; std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); + +/// Special handling for errors during asynchronous stack unwinding, +/// Which is used in Query Profiler +extern thread_local bool asynchronous_stack_unwinding; +extern thread_local sigjmp_buf asynchronous_stack_unwinding_signal_jump_buffer; diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index c167ca1baa0..3b030c918e7 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -137,17 +137,12 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) static std::atomic_flag fatal_error_printed; -/// Special handling for a function isPointerReadable. -thread_local bool checking_pointer = false; -thread_local sigjmp_buf signal_jump_buffer; - /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. */ static void signalHandler(int sig, siginfo_t * info, void * context) { - /// Special handling for a function isPointerReadable. - if (checking_pointer && sig == SIGSEGV) - siglongjmp(signal_jump_buffer, 1); + if (asynchronous_stack_unwinding && sig == SIGSEGV) + siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1); DENY_ALLOCATIONS_IN_SCOPE; auto saved_errno = errno; /// We must restore previous value of errno in signal handler. @@ -190,28 +185,6 @@ static void signalHandler(int sig, siginfo_t * info, void * context) errno = saved_errno; } -#if !defined(SANITIZER) -/// This function can be used from other translation units, -/// For example, from libunwind while parsing debug info, which is unsafe. -/// Note: we are checking only the first byte, which is ok for aligned words. -extern "C" bool isPointerReadable(const void * ptr) -{ - checking_pointer = true; - if (0 == sigsetjmp(signal_jump_buffer, 1)) - { - char res; - memcpy(&res, ptr, 1); - __asm__ __volatile__("" :: "r"(res) : "memory"); - checking_pointer = false; - return true; - } - else - { - checking_pointer = false; - return false; - } -} -#endif static bool getenvBool(const char * name) { From 92ebf3d5becdf7af2bc17e3afb479fd456f008c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 09:22:14 +0100 Subject: [PATCH 062/651] Loosen --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b55e9810361..9ffb4789dc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory") + # -fsanitize=memory and address are too heavy + if (SANITIZE) set (RLIMIT_DATA 10000000000) # 10G endif() From 70272d41744d9cc219d79c6dd5e3b6c9e523d447 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 10:55:01 +0100 Subject: [PATCH 063/651] Minor --- src/CMakeLists.txt | 2 +- src/Databases/DatabaseHDFS.cpp | 2 +- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 6 +- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 2 +- .../ObjectStorages/ObjectStorageFactory.cpp | 2 +- src/IO/examples/read_buffer_from_hdfs.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 1200 ----------------- src/Storages/Hive/HiveCommon.h | 2 +- src/Storages/Hive/HiveFile.h | 2 +- src/Storages/Hive/StorageHive.cpp | 4 +- src/Storages/Hive/StorageHive.h | 2 +- .../HDFS/AsynchronousReadBufferFromHDFS.cpp | 2 +- .../HDFS/AsynchronousReadBufferFromHDFS.h | 2 +- .../ObjectStorage/HDFS/Configuration.cpp | 2 +- .../{ => ObjectStorage}/HDFS/HDFSCommon.cpp | 2 +- .../{ => ObjectStorage}/HDFS/HDFSCommon.h | 0 .../HDFS/ReadBufferFromHDFS.cpp | 2 +- .../HDFS/ReadBufferFromHDFS.h | 0 .../HDFS/WriteBufferFromHDFS.cpp | 4 +- .../HDFS/WriteBufferFromHDFS.h | 0 .../examples/async_read_buffer_from_hdfs.cpp | 2 +- 21 files changed, 21 insertions(+), 1221 deletions(-) delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp rename src/Storages/{ => ObjectStorage}/HDFS/AsynchronousReadBufferFromHDFS.cpp (99%) rename src/Storages/{ => ObjectStorage}/HDFS/AsynchronousReadBufferFromHDFS.h (96%) rename src/Storages/{ => ObjectStorage}/HDFS/HDFSCommon.cpp (99%) rename src/Storages/{ => ObjectStorage}/HDFS/HDFSCommon.h (100%) rename src/Storages/{ => ObjectStorage}/HDFS/ReadBufferFromHDFS.cpp (99%) rename src/Storages/{ => ObjectStorage}/HDFS/ReadBufferFromHDFS.h (100%) rename src/Storages/{ => ObjectStorage}/HDFS/WriteBufferFromHDFS.cpp (97%) rename src/Storages/{ => ObjectStorage}/HDFS/WriteBufferFromHDFS.h (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1cf0e4e2b98..3cb64b56c46 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -150,7 +150,7 @@ if (TARGET ch_contrib::azure_sdk) endif() if (TARGET ch_contrib::hdfs) - add_headers_and_sources(dbms Storages/HDFS) + add_headers_and_sources(dbms Storages/ObjectStorage/HDFS) add_headers_and_sources(dbms Disks/ObjectStorages/HDFS) endif() diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 3a1e6b16ccf..cda38a69c9a 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 8bff687b915..2d03de60c3c 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -1,10 +1,10 @@ #include #include -#include -#include +#include +#include -#include +#include #include #include diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 66095eb9f8f..4072d21ed7c 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 02b6816d673..d1841c92a6b 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -7,7 +7,7 @@ #endif #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include -#include +#include #endif #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp index 977dd2ae227..a5cf43b3e79 100644 --- a/src/IO/examples/read_buffer_from_hdfs.cpp +++ b/src/IO/examples/read_buffer_from_hdfs.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp deleted file mode 100644 index cd935fa3100..00000000000 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ /dev/null @@ -1,1200 +0,0 @@ -#include "config.h" - -#if USE_HDFS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace ProfileEvents -{ - extern const Event EngineFileLikeReadFiles; -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ACCESS_DENIED; - extern const int DATABASE_ACCESS_DENIED; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_DETECT_FORMAT; -} -namespace -{ - struct HDFSFileInfoDeleter - { - /// Can have only one entry (see hdfsGetPathInfo()) - void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); } - }; - using HDFSFileInfoPtr = std::unique_ptr; - - /* Recursive directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ - std::vector LSWithRegexpMatching( - const String & path_for_ls, - const HDFSFSPtr & fs, - const String & for_match) - { - std::vector result; - - const size_t first_glob_pos = for_match.find_first_of("*?{"); - - if (first_glob_pos == std::string::npos) - { - const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal(); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str())); - if (hdfs_info) // NOLINT - { - result.push_back(StorageHDFS::PathWithInfo{ - String(path), - StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}}); - } - return result; - } - - const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); - const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - - const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1); - - const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); - if (!matcher.ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); - - HDFSFileInfo ls; - ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); - if (ls.file_info == nullptr && errno != ENOENT) // NOLINT - { - // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. - throw Exception( - ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError())); - } - - if (!ls.file_info && ls.length > 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); - for (int i = 0; i < ls.length; ++i) - { - const String full_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = full_path.rfind('/'); - const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; - const bool is_directory = ls.file_info[i].mKind == 'D'; - /// Condition with type of current file_info means what kind of path is it in current iteration of ls - if (!is_directory && !looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - result.push_back(StorageHDFS::PathWithInfo{ - String(full_path), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); - } - else if (is_directory && looking_for_directory) - { - if (re2::RE2::FullMatch(file_name, matcher)) - { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, - suffix_with_globs.substr(next_slash_after_glob_pos)); - /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); - } - } - } - - return result; - } - - std::pair getPathFromUriAndUriWithoutPath(const String & uri) - { - auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 < uri.length()) - { - pos = uri.find('/', pos + 2); - if (pos != std::string::npos) - return {uri.substr(pos), uri.substr(0, pos)}; - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); - } - - std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context) - { - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - Strings paths = expandSelectionGlob(path_from_uri); - - std::vector res; - - for (const auto & path : paths) - { - auto part_of_res = LSWithRegexpMatching("/", fs, path); - res.insert(res.end(), part_of_res.begin(), part_of_res.end()); - } - return res; - } -} - -StorageHDFS::StorageHDFS( - const String & uri_, - const StorageID & table_id_, - const String & format_name_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment, - const ContextPtr & context_, - const String & compression_method_, - const bool distributed_processing_, - ASTPtr partition_by_) - : IStorage(table_id_) - , WithContext(context_) - , uris({uri_}) - , format_name(format_name_) - , compression_method(compression_method_) - , distributed_processing(distributed_processing_) - , partition_by(partition_by_) -{ - if (format_name != "auto") - FormatFactory::instance().checkFormatName(format_name); - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); - checkHDFSURL(uri_); - - String path = uri_.substr(uri_.find('/', uri_.find("//") + 2)); - is_path_with_globs = path.find_first_of("*?{") != std::string::npos; - - StorageInMemoryMetadata storage_metadata; - - if (columns_.empty()) - { - ColumnsDescription columns; - if (format_name == "auto") - std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_); - else - columns = getTableStructureFromData(format_name, uri_, compression_method, context_); - - storage_metadata.setColumns(columns); - } - else - { - if (format_name == "auto") - format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second; - - /// We don't allow special columns in HDFS storage. - if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - - virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList()); -} - -namespace -{ - class ReadBufferIterator : public IReadBufferIterator, WithContext - { - public: - ReadBufferIterator( - const std::vector & paths_with_info_, - const String & uri_without_path_, - std::optional format_, - const String & compression_method_, - const ContextPtr & context_) - : WithContext(context_) - , paths_with_info(paths_with_info_) - , uri_without_path(uri_without_path_) - , format(std::move(format_)) - , compression_method(compression_method_) - { - } - - Data next() override - { - bool is_first = current_index == 0; - /// For default mode check cached columns for all paths on first iteration. - if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - if (auto cached_columns = tryGetColumnsFromCache(paths_with_info)) - return {nullptr, cached_columns, format}; - } - - StorageHDFS::PathWithInfo path_with_info; - - while (true) - { - if (current_index == paths_with_info.size()) - { - if (is_first) - { - if (format) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because all files are empty. " - "You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually"); - } - return {nullptr, std::nullopt, format}; - } - - path_with_info = paths_with_info[current_index++]; - if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0) - continue; - - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - std::vector paths = {path_with_info}; - if (auto cached_columns = tryGetColumnsFromCache(paths)) - return {nullptr, cached_columns, format}; - } - - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof()) - { - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)), std::nullopt, format}; - } - } - } - - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = uri_without_path + paths_with_info[current_index - 1].path; - auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - Strings sources; - sources.reserve(paths_with_info.size()); - std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext()); - StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_index != 0) - return paths_with_info[current_index - 1].path; - - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_index > 0 && current_index <= paths_with_info.size()); - auto path_with_info = paths_with_info[current_index - 1]; - auto compression = chooseCompressionMethod(path_with_info.path, compression_method); - auto impl = std::make_unique(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - } - - private: - std::optional tryGetColumnsFromCache(const std::vector & paths_with_info_) - { - auto context = getContext(); - - if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs) - return std::nullopt; - - auto & schema_cache = StorageHDFS::getSchemaCache(context); - for (const auto & path_with_info : paths_with_info_) - { - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - - auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str())); - if (hdfs_info) - return hdfs_info->mLastMod; - - return std::nullopt; - }; - - String url = uri_without_path + path_with_info.path; - if (format) - { - auto cache_key = getKeyForSchemaCache(url, *format, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; - } - else - { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) - { - auto cache_key = getKeyForSchemaCache(url, format_name, {}, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } - } - } - } - - return std::nullopt; - } - - const std::vector & paths_with_info; - const String & uri_without_path; - std::optional format; - const String & compression_method; - size_t current_index = 0; - }; -} - -std::pair StorageHDFS::getTableStructureAndFormatFromDataImpl( - std::optional format, - const String & uri, - const String & compression_method, - const ContextPtr & ctx) -{ - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - auto paths_with_info = getPathsList(path_from_uri, uri, ctx); - - if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format))) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path." - " You can specify table structure manually", *format); - - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path." - " You can specify the format manually"); - } - - ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx); - if (format) - return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format}; - return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx); -} - -std::pair StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx); -} - -ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx) -{ - return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first; -} - -class HDFSSource::DisclosedGlobIterator::Impl -{ -public: - Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - { - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); - uris = getPathsList(path_from_uri, uri_without_path, context); - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & path_with_info : uris) - paths.push_back(path_with_info.path); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); - } - auto file_progress_callback = context->getFileProgressCallback(); - - for (auto & elem : uris) - { - elem.path = uri_without_path + elem.path; - if (file_progress_callback && elem.info) - file_progress_callback(FileProgress(0, elem.info->size)); - } - uris_iter = uris.begin(); - } - - StorageHDFS::PathWithInfo next() - { - std::lock_guard lock(mutex); - if (uris_iter != uris.end()) - { - auto answer = *uris_iter; - ++uris_iter; - return answer; - } - return {}; - } -private: - std::mutex mutex; - std::vector uris; - std::vector::iterator uris_iter; -}; - -class HDFSSource::URISIterator::Impl : WithContext -{ -public: - explicit Impl(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_) - : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) - { - ActionsDAGPtr filter_dag; - if (!uris.empty()) - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - - if (filter_dag) - { - std::vector paths; - paths.reserve(uris.size()); - for (const auto & uri : uris) - paths.push_back(getPathFromUriAndUriWithoutPath(uri).first); - - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext()); - } - - if (!uris.empty()) - { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); - builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); - fs = createHDFSFS(builder.get()); - } - } - - StorageHDFS::PathWithInfo next() - { - String uri; - HDFSFileInfoPtr hdfs_info; - do - { - size_t current_index = index.fetch_add(1); - if (current_index >= uris.size()) - return {"", {}}; - - uri = uris[current_index]; - auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); - hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str())); - } - /// Skip non-existed files. - while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); - - std::optional info; - if (hdfs_info) - { - info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - if (file_progress_callback) - file_progress_callback(FileProgress(0, hdfs_info->mSize)); - } - - return {uri, info}; - } - -private: - std::atomic_size_t index = 0; - Strings uris; - HDFSBuilderWrapper builder; - HDFSFSPtr fs; - std::function file_progress_callback; -}; - -HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uri, predicate, virtual_columns, context)) {} - -StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::URISIterator::URISIterator(const std::vector & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context) - : pimpl(std::make_shared(uris_, predicate, virtual_columns, context)) -{ -} - -StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() -{ - return pimpl->next(); -} - -HDFSSource::HDFSSource( - const ReadFromFormatInfo & info, - StorageHDFSPtr storage_, - const ContextPtr & context_, - UInt64 max_block_size_, - std::shared_ptr file_iterator_, - bool need_only_count_) - : ISource(info.source_header, false) - , WithContext(context_) - , storage(std::move(storage_)) - , block_for_format(info.format_header) - , requested_columns(info.requested_columns) - , requested_virtual_columns(info.requested_virtual_columns) - , max_block_size(max_block_size_) - , file_iterator(file_iterator_) - , columns_description(info.columns_description) - , need_only_count(need_only_count_) -{ - initialize(); -} - -bool HDFSSource::initialize() -{ - bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files; - StorageHDFS::PathWithInfo path_with_info; - while (true) - { - path_with_info = (*file_iterator)(); - if (path_with_info.path.empty()) - return false; - - if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0) - continue; - - current_path = path_with_info.path; - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); - - std::optional file_size; - if (!path_with_info.info) - { - auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str())); - if (hdfs_info) - path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; - } - - if (path_with_info.info) - file_size = path_with_info.info->size; - - auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); - auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); - if (!skip_empty_files || !impl->eof()) - { - impl->setProgressCallback(getContext()); - const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); - break; - } - } - - current_path = path_with_info.path; - current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt; - - QueryPipelineBuilder builder; - std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt; - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use a special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto source = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - builder.init(Pipe(source)); - } - else - { - std::optional max_parsing_threads; - if (need_only_count) - max_parsing_threads = 1; - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - - if (need_only_count) - input_format->needOnlyCount(); - - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - } - - /// Add ExtractColumnsTransform to extract requested columns/subcolumns - /// from the chunk read by IInputFormat. - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - - ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles); - return true; -} - -String HDFSSource::getName() const -{ - return "HDFSSource"; -} - -Chunk HDFSSource::generate() -{ - while (true) - { - if (isCancelled() || !reader) - { - if (reader) - reader->cancel(); - break; - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - total_rows_in_file += num_rows; - size_t chunk_size = 0; - if (input_format) - chunk_size = input_format->getApproxBytesReadForChunk(); - progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size); - return chunk; - } - - if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(current_path, total_rows_in_file); - - total_rows_in_file = 0; - - reader.reset(); - pipeline.reset(); - input_format.reset(); - read_buf.reset(); - - if (!initialize()) - break; - } - return {}; -} - -void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) -{ - auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); - StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); -} - -std::optional HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info) -{ - auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - if (path_with_info.info) - return path_with_info.info->last_mod_time; - return std::nullopt; - }; - - return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); -} - -class HDFSSink : public SinkToStorage -{ -public: - HDFSSink(const String & uri, - const String & format, - const Block & sample_block, - const ContextPtr & context, - const CompressionMethod compression_method) - : SinkToStorage(sample_block) - { - const auto & settings = context->getSettingsRef(); - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique( - uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()), - compression_method, - static_cast(settings.output_format_compression_level), - static_cast(settings.output_format_compression_zstd_window_log)); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); - } - - String getName() const override { return "HDFSSink"; } - - void consume(Chunk chunk) override - { - std::lock_guard lock(cancel_mutex); - if (cancelled) - return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); - } - - void onCancel() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - cancelled = true; - } - - void onException(std::exception_ptr exception) override - { - std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } - } - - void onFinish() override - { - std::lock_guard lock(cancel_mutex); - finalize(); - } - -private: - void finalize() - { - if (!writer) - return; - - try - { - writer->finalize(); - writer->flush(); - write_buf->sync(); - write_buf->finalize(); - } - catch (...) - { - /// Stop ParallelFormattingOutputFormat correctly. - release(); - throw; - } - } - - void release() - { - writer.reset(); - write_buf->finalize(); - } - - std::unique_ptr write_buf; - OutputFormatPtr writer; - std::mutex cancel_mutex; - bool cancelled = false; -}; - -class PartitionedHDFSSink : public PartitionedSink -{ -public: - PartitionedHDFSSink( - const ASTPtr & partition_by, - const String & uri_, - const String & format_, - const Block & sample_block_, - ContextPtr context_, - const CompressionMethod compression_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , sample_block(sample_block_) - , context(context_) - , compression_method(compression_method_) - { - } - - SinkPtr createSinkForPartition(const String & partition_id) override - { - auto path = PartitionedSink::replaceWildcards(uri, partition_id); - PartitionedSink::validatePartitionKey(path, true); - return std::make_shared(path, format, sample_block, context, compression_method); - } - -private: - const String uri; - const String format; - const Block sample_block; - ContextPtr context; - const CompressionMethod compression_method; -}; - - -bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const -{ - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); -} - -class ReadFromHDFS : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromHDFS"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - ReadFromHDFS( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - ReadFromFormatInfo info_, - bool need_only_count_, - std::shared_ptr storage_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , info(std::move(info_)) - , need_only_count(need_only_count_) - , storage(std::move(storage_)) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - { - } - -private: - ReadFromFormatInfo info; - const bool need_only_count; - std::shared_ptr storage; - - size_t max_block_size; - size_t num_streams; - - std::shared_ptr iterator_wrapper; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void StorageHDFS::read( - QueryPlan & query_plan, - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context_, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns); - bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) - && context_->getSettingsRef().optimize_count_from_files; - - auto this_ptr = std::static_pointer_cast(shared_from_this()); - - auto reading = std::make_unique( - column_names, - query_info, - storage_snapshot, - context_, - read_from_format_info.source_header, - std::move(read_from_format_info), - need_only_count, - std::move(this_ptr), - max_block_size, - num_streams); - - query_plan.addStep(std::move(reading)); -} - -void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate) -{ - if (iterator_wrapper) - return; - - if (storage->distributed_processing) - { - iterator_wrapper = std::make_shared( - [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo { - return StorageHDFS::PathWithInfo{callback(), std::nullopt}; - }); - } - else if (storage->is_path_with_globs) - { - /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared(storage->uris[0], predicate, storage->virtual_columns, context); - iterator_wrapper = std::make_shared([glob_iterator]() - { - return glob_iterator->next(); - }); - } - else - { - auto uris_iterator = std::make_shared(storage->uris, predicate, storage->virtual_columns, context); - iterator_wrapper = std::make_shared([uris_iterator]() - { - return uris_iterator->next(); - }); - } -} - -void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - pipes.emplace_back(std::make_shared( - info, - storage, - context, - max_block_size, - iterator_wrapper, - need_only_count)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/) -{ - String current_uri = uris.back(); - - bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; - const auto * insert_query = dynamic_cast(query.get()); - auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && has_wildcards; - - if (is_partitioned_implementation) - { - return std::make_shared( - partition_by_ast, - current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } - else - { - if (is_path_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back()); - - const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_uri); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - bool truncate_on_insert = context_->getSettingsRef().hdfs_truncate_on_insert; - if (!truncate_on_insert && !hdfsExists(fs.get(), path_from_uri.c_str())) - { - if (context_->getSettingsRef().hdfs_create_new_file_on_insert) - { - auto pos = uris[0].find_first_of('.', uris[0].find_last_of('/')); - size_t index = uris.size(); - String new_uri; - do - { - new_uri = uris[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : uris[0].substr(pos)); - ++index; - } - while (!hdfsExists(fs.get(), new_uri.c_str())); - uris.push_back(new_uri); - current_uri = new_uri; - } - else - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, " - "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert", - path_from_uri); - } - - return std::make_shared(current_uri, - format_name, - metadata_snapshot->getSampleBlock(), - context_, - chooseCompressionMethod(current_uri, compression_method)); - } -} - -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) -{ - const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2); - const String url = uris[0].substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - - for (const auto & uri : uris) - { - const String path = uri.substr(begin_of_path); - int ret = hdfsDelete(fs.get(), path.data(), 0); - if (ret) - throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); - } -} - - -void registerStorageHDFS(StorageFactory & factory) -{ - factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage HDFS requires 1, 2 or 3 arguments: " - "url, name of used format (taken from file extension by default) and optional compression method."); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - - String url = checkAndGetLiteralArgument(engine_args[0], "url"); - - String format_name = "auto"; - if (engine_args.size() > 1) - { - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); - } - - if (format_name == "auto") - format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto"); - - String compression_method; - if (engine_args.size() == 3) - { - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); - } else compression_method = "auto"; - - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); - - return std::make_shared( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); - }, - { - .supports_sort_order = true, // for partition by - .supports_schema_inference = true, - .source_access_type = AccessType::HDFS, - }); -} - -NamesAndTypesList StorageHDFS::getVirtuals() const -{ - return virtual_columns; -} - -Names StorageHDFS::getVirtualColumnNames() -{ - return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames(); -} - -SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx) -{ - static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -} - -#endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index 0f9d3364ffd..81c167165d3 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 1f5e31f1d54..affb72fe09b 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace orc { diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 183a4532281..a76cef2d45d 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -38,8 +38,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 07440097f7a..43a22a886a8 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp similarity index 99% rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp index 6b6151f5474..21df7e35284 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -1,9 +1,9 @@ #include "AsynchronousReadBufferFromHDFS.h" #if USE_HDFS +#include "ReadBufferFromHDFS.h" #include #include -#include #include #include diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h similarity index 96% rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h index 10e2749fd4a..5aef92315a4 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index a64faafd53d..6c7fe1cef7e 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -1,7 +1,7 @@ #include #if USE_HDFS -#include +#include #include #include #include diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp similarity index 99% rename from src/Storages/HDFS/HDFSCommon.cpp rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp index f9a55a1285a..5d14cec14bd 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp @@ -1,4 +1,4 @@ -#include +#include "HDFSCommon.h" #include #include #include diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/ObjectStorage/HDFS/HDFSCommon.h similarity index 100% rename from src/Storages/HDFS/HDFSCommon.h rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.h diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp similarity index 99% rename from src/Storages/HDFS/ReadBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index 4df05d47003..18b22805dfc 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -1,7 +1,7 @@ #include "ReadBufferFromHDFS.h" #if USE_HDFS -#include +#include "HDFSCommon.h" #include #include #include diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h similarity index 100% rename from src/Storages/HDFS/ReadBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp similarity index 97% rename from src/Storages/HDFS/WriteBufferFromHDFS.cpp rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp index 9d383aa8245..2c14b38ce01 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp @@ -2,8 +2,8 @@ #if USE_HDFS -#include -#include +#include "WriteBufferFromHDFS.h" +#include "HDFSCommon.h" #include #include #include diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h similarity index 100% rename from src/Storages/HDFS/WriteBufferFromHDFS.h rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h diff --git a/src/Storages/examples/async_read_buffer_from_hdfs.cpp b/src/Storages/examples/async_read_buffer_from_hdfs.cpp index 4f6aed8ef65..1c47a07ba58 100644 --- a/src/Storages/examples/async_read_buffer_from_hdfs.cpp +++ b/src/Storages/examples/async_read_buffer_from_hdfs.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include int main() { From cfb73dd30781c95261a02dfb3443f6a18273612b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:54:23 +0100 Subject: [PATCH 064/651] Move input_format_tsv_crlf_end_of_line to 24.3 settings changes --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index b14953fd706..5ce98a92003 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -86,6 +86,7 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, @@ -109,7 +110,6 @@ static std::map sett {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, From e019b3a391bb8e3bbfa991e083e65e76438a2a9e Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 25 Mar 2024 16:12:39 +0100 Subject: [PATCH 065/651] Fix build after merge --- src/Backups/BackupIO_AzureBlobStorage.cpp | 2 +- .../IO/WriteBufferFromAzureBlobStorage.h | 2 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 12 +-- .../ObjectStorage/AzureBlob/Configuration.cpp | 73 +++++++++++-------- .../ObjectStorage/AzureBlob/Configuration.h | 2 +- .../DataLakes/DeltaLakeMetadata.cpp | 1 + .../ObjectStorage/HDFS/Configuration.cpp | 8 +- .../ObjectStorage/S3/Configuration.cpp | 2 + .../StorageObjectStorageConfiguration.cpp | 10 +++ .../StorageObjectStorageConfiguration.h | 4 + src/Storages/S3Queue/S3QueueTableMetadata.cpp | 1 - src/Storages/S3Queue/S3QueueTableMetadata.h | 1 + 12 files changed, 77 insertions(+), 41 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index dc4a825189f..8a3ff1c3b5e 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , configuration(configuration_) { - auto client_ptr = configuration.createClient(/* is_read_only */ false); + auto client_ptr = configuration.createClient(/* is_read_only */ false, /* attempt_to_create_container */true); object_storage = std::make_unique("BackupReaderAzureBlobStorage", std::move(client_ptr), configuration.createSettings(context_), diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 6e10c07b255..dbf0b2a3052 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace Poco { diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 13d4c2a551b..872f7eec07b 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -69,10 +69,6 @@ std::unique_ptr getClient( { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region")) - throw Exception( - ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix); - url = S3::URI(endpoint); if (!url.key.ends_with('/')) url.key.push_back('/'); @@ -83,6 +79,12 @@ std::unique_ptr getClient( throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed"); url = *url_; } + const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint); + if (is_s3_express_bucket && !config.has(config_prefix + ".region")) + { + throw Exception( + ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix); + } S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( auth_settings.region, @@ -130,7 +132,7 @@ std::unique_ptr getClient( .use_virtual_addressing = url.is_virtual_hosted_style, .disable_checksum = local_settings.s3_disable_checksum, .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint), + .is_s3express_bucket = is_s3_express_bucket, }; auto credentials_configuration = S3::CredentialsConfiguration diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 7a670441e72..018cec51e7c 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,8 @@ namespace return !candidate.starts_with("http"); } - bool containerExists(Azure::Storage::Blobs::BlobServiceClient & blob_service_client, std::string container_name) + template + bool containerExists(T & blob_service_client, const std::string & container_name) { Azure::Storage::Blobs::ListBlobContainersOptions options; options.Prefix = container_name; @@ -101,12 +103,13 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { - auto client = createClient(is_readonly); + assertInitialized(); + auto client = createClient(is_readonly, /* attempt_to_create_container */true); auto settings = createSettings(context); return std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), container); } -AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) +AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) { using namespace Azure::Storage::Blobs; @@ -114,28 +117,32 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) if (is_connection_string) { - auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); + std::shared_ptr managed_identity_credential = std::make_shared(); + std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); - bool container_exists = containerExists(*blob_service_client, container); - if (!container_exists) + if (attempt_to_create_container) { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); + bool container_exists = containerExists(*blob_service_client, container); + if (!container_exists) + { + if (is_read_only) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "AzureBlobStorage container does not exist '{}'", + container); - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode != Azure::Core::Http::HttpStatusCode::Conflict - || e.ReasonPhrase != "The specified container already exists.") + try { - throw; + result->CreateIfNotExists(); + } + catch (const Azure::Storage::StorageException & e) + { + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) + { + throw; + } } } } @@ -145,22 +152,22 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) std::shared_ptr storage_shared_key_credential; if (account_name.has_value() && account_key.has_value()) { - storage_shared_key_credential = - std::make_shared(*account_name, *account_key); + storage_shared_key_credential + = std::make_shared(*account_name, *account_key); } std::unique_ptr blob_service_client; + std::shared_ptr managed_identity_credential; if (storage_shared_key_credential) { blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); } else { - blob_service_client = std::make_unique(connection_url); + managed_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(connection_url, managed_identity_credential); } - bool container_exists = containerExists(*blob_service_client, container); - std::string final_url; size_t pos = connection_url.find('?'); if (pos != std::string::npos) @@ -173,12 +180,21 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) final_url = connection_url + (connection_url.back() == '/' ? "" : "/") + container; + if (!attempt_to_create_container) + { + if (storage_shared_key_credential) + return std::make_unique(final_url, storage_shared_key_credential); + else + return std::make_unique(final_url, managed_identity_credential); + } + + bool container_exists = containerExists(*blob_service_client, container); if (container_exists) { if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url); + result = std::make_unique(final_url, managed_identity_credential); } else { @@ -190,8 +206,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) try { result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); - } - catch (const Azure::Storage::StorageException & e) + } catch (const Azure::Storage::StorageException & e) { if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict && e.ReasonPhrase == "The specified container already exists.") @@ -199,7 +214,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only) if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url); + result = std::make_unique(final_url, managed_identity_credential); } else { diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h index 3d701e72cb4..8040d433d99 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h @@ -52,7 +52,7 @@ protected: std::string blob_path; std::vector blobs_paths; - AzureClientPtr createClient(bool is_read_only); + AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); }; diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 903558b73ab..1caa2c000d6 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 848fdb292e8..03a0a1a5e69 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -29,12 +29,14 @@ void StorageHDFSConfiguration::check(ContextPtr context) const checkHDFSURL(fs::path(url) / path); } -ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT { - UNUSED(is_readonly); - auto settings = std::make_unique(); + assertInitialized(); + if (!url.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS url is empty"); + + auto settings = std::make_unique(); return std::make_shared(url, std::move(settings), context->getConfigRef()); } diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 0c05f77541b..4e6d8980aa7 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -66,6 +66,8 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT { + assertInitialized(); + const auto & config = context->getConfigRef(); const std::string config_prefix = "s3."; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 8a4dee2c31b..6172f8934af 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -18,6 +18,8 @@ void StorageObjectStorageConfiguration::initialize( // FIXME: it should be - if (format == "auto" && get_format_from_file) if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); + + configuration.initialized = true; } StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other) @@ -48,4 +50,12 @@ std::string StorageObjectStorageConfiguration::getPathWithoutGlob() const return getPath().substr(0, getPath().find_first_of("*?{")); } +void StorageObjectStorageConfiguration::assertInitialized() const +{ + if (!initialized) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage"); + } +} + } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 8134bd07806..66fe6a68d76 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -54,6 +54,10 @@ public: protected: virtual void fromNamedCollection(const NamedCollection & collection) = 0; virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; + + void assertInitialized() const; + + bool initialized = false; }; using StorageObjectStorageConfigurationPtr = std::shared_ptr; diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp index e1978259230..8354e6aa2ae 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -7,7 +7,6 @@ #include #include #include -#include namespace DB diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index a649f211abc..2158b189070 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -3,6 +3,7 @@ #if USE_AWS_S3 #include +#include #include #include From f5982fdb1ff30280dfebd89afb9274fca33c56b6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 25 Mar 2024 19:19:54 +0100 Subject: [PATCH 066/651] Fix some tests --- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 16 ++----- .../ObjectStorages/ObjectStorageFactory.cpp | 3 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 - .../ObjectStorage/HDFS/Configuration.cpp | 48 +++++++++++++------ .../ObjectStorage/HDFS/Configuration.h | 6 +-- .../ObjectStorage/ReadBufferIterator.cpp | 4 +- .../ObjectStorage/S3/Configuration.cpp | 6 +++ src/Storages/ObjectStorage/S3/Configuration.h | 2 + .../ObjectStorage/StorageObjectStorage.cpp | 2 + .../ObjectStorage/StorageObjectStorage.h | 2 + .../StorageObjectStorageConfiguration.cpp | 4 ++ .../StorageObjectStorageConfiguration.h | 2 + .../StorageObjectStorageSink.cpp | 40 ++++++++++++++-- .../ObjectStorage/StorageObjectStorageSink.h | 3 ++ src/Storages/StorageS3Settings.cpp | 2 +- .../queries/0_stateless/02114_hdfs_bad_url.sh | 1 - .../0_stateless/02700_s3_part_INT_MAX.sh | 2 +- ...ed_url_and_url_with_special_characters.sql | 3 +- 18 files changed, 104 insertions(+), 44 deletions(-) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 4072d21ed7c..f92e160fd4d 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -16,21 +16,13 @@ namespace DB struct HDFSObjectStorageSettings { - - HDFSObjectStorageSettings() = default; - - size_t min_bytes_for_seek; - int objects_chunk_size_to_delete; - int replication; - - HDFSObjectStorageSettings( - int min_bytes_for_seek_, - int objects_chunk_size_to_delete_, - int replication_) + HDFSObjectStorageSettings(int min_bytes_for_seek_, int replication_) : min_bytes_for_seek(min_bytes_for_seek_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) , replication(replication_) {} + + size_t min_bytes_for_seek; + int replication; }; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index f30a552f8dd..67e38d6389a 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -227,9 +227,8 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) if (uri.back() != '/') throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); - std::unique_ptr settings = std::make_unique( + auto settings = std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), context->getSettingsRef().hdfs_replication ); diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 872f7eec07b..1aecb590526 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -143,8 +143,6 @@ std::unique_ptr getClient( auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), }; - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {} - {}", auth_settings.access_key_id, auth_settings.secret_access_key); - return S3::ClientFactory::instance().create( client_configuration, client_settings, diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 03a0a1a5e69..5edc660d717 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB @@ -13,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) @@ -29,37 +31,53 @@ void StorageHDFSConfiguration::check(ContextPtr context) const checkHDFSURL(fs::path(url) / path); } -ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT +ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT + ContextPtr context, + bool /* is_readonly */) { assertInitialized(); - - if (!url.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS url is empty"); - - auto settings = std::make_unique(); - return std::make_shared(url, std::move(settings), context->getConfigRef()); + const auto & settings = context->getSettingsRef(); + auto hdfs_settings = std::make_unique( + settings.remote_read_min_bytes_for_seek, + settings.hdfs_replication + ); + return std::make_shared(url, std::move(hdfs_settings), context->getConfigRef()); } -void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_structure */) +void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool /* with_structure */) { url = checkAndGetLiteralArgument(args[0], "url"); if (args.size() > 1) + { + args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); format = checkAndGetLiteralArgument(args[1], "format_name"); - else - format = "auto"; + } if (args.size() == 3) + { + args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); - else - compression_method = "auto"; + } - const size_t begin_of_path = url.find('/', url.find("//") + 2); - path = url.substr(begin_of_path + 1); - url = url.substr(0, begin_of_path); + auto pos = url.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url); + + pos = url.find('/', pos + 2); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url); + + path = url.substr(pos + 1); + url = url.substr(0, pos); paths = {path}; } +void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method fromNamedColection() is not implemented"); +} + } #endif diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 1013c2e00c2..5765edbf36c 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -29,12 +29,12 @@ public: ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } - void fromNamedCollection(const NamedCollection &) override {} - void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; - static void addStructureToArgs(ASTs &, const String &, ContextPtr) {} private: + void fromNamedCollection(const NamedCollection &) override; + void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; + String url; String path; std::vector paths; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index dd4bfe79b06..0b6e34fb831 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -31,7 +31,7 @@ ReadBufferIterator::ReadBufferIterator( , query_settings(query_settings_) , schema_cache(schema_cache_) , read_keys(read_keys_) - , format(configuration->format.empty() || configuration->format == "auto" ? std::nullopt : std::optional(configuration->format)) + , format(configuration->format == "auto" ? std::nullopt : std::optional(configuration->format)) , prev_read_keys_size(read_keys_.size()) { } @@ -191,7 +191,7 @@ ReadBufferIterator::Data ReadBufferIterator::next() { if (first) { - if (format) + if (format.has_value()) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "The table structure cannot be extracted from a {} format file, because there are no files with provided path " diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 4e6d8980aa7..132a5045d8a 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -51,10 +51,16 @@ String StorageS3Configuration::getDataSourceDescription() void StorageS3Configuration::check(ContextPtr context) const { + validateNamespace(url.bucket); context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri); context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); } +void StorageS3Configuration::validateNamespace(const String & name) const +{ + S3::URI::validateBucket(name, {}); +} + StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) : StorageObjectStorageConfiguration(other) { diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index 88a084f29b3..f9614da4b95 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -27,6 +27,8 @@ public: String getDataSourceDescription() override; void check(ContextPtr context) const override; + void validateNamespace(const String & name) const override; + StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } bool isStaticConfiguration() const override { return static_configuration; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index f1d3635514f..3a894af3e01 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -87,6 +87,7 @@ StorageObjectStorage::StorageObjectStorage( , format_settings(format_settings_) , partition_by(partition_by_) , distributed_processing(distributed_processing_) + , log(getLogger("Storage" + engine_name_)) , object_storage(object_storage_) , configuration(configuration_) { @@ -204,6 +205,7 @@ SinkToStoragePtr StorageObjectStorage::write( if (partition_by_ast) { + LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath()); return std::make_shared( object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 743b725a88a..ebaf504f532 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +114,7 @@ protected: const ASTPtr partition_by; const bool distributed_processing; + LoggerPtr log; ObjectStoragePtr object_storage; ConfigurationPtr configuration; std::mutex configuration_update_mutex; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 6172f8934af..9a8b8191907 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -18,7 +19,10 @@ void StorageObjectStorageConfiguration::initialize( // FIXME: it should be - if (format == "auto" && get_format_from_file) if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); + else + FormatFactory::instance().checkFormatName(configuration.format); + configuration.check(local_context); configuration.initialized = true; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 66fe6a68d76..0beed91b128 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -43,6 +43,8 @@ public: std::string getPathWithoutGlob() const; virtual void check(ContextPtr context) const = 0; + virtual void validateNamespace(const String & /* name */) const {} + virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT virtual StorageObjectStorageConfigurationPtr clone() = 0; virtual bool isStaticConfiguration() const { return true; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 37f93a2b82f..2dd8516ebe8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -1,9 +1,14 @@ #include "StorageObjectStorageSink.h" #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_TEXT; +} StorageObjectStorageSink::StorageObjectStorageSink( ObjectStoragePtr object_storage, @@ -93,6 +98,7 @@ void StorageObjectStorageSink::release() write_buf->finalize(); } + PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, StorageObjectStorageConfigurationPtr configuration_, @@ -111,9 +117,12 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String & partition_id) { - auto blob = configuration->getPaths().back(); - auto partition_key = replaceWildcards(blob, partition_id); - validatePartitionKey(partition_key, true); + auto partition_bucket = replaceWildcards(configuration->getNamespace(), partition_id); + validateNamespace(partition_bucket); + + auto partition_key = replaceWildcards(configuration->getPath(), partition_id); + validateKey(partition_key); + return std::make_shared( object_storage, configuration, @@ -124,4 +133,29 @@ SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String ); } +void PartitionedStorageObjectStorageSink::validateKey(const String & str) +{ + /// See: + /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html + /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject + + if (str.empty() || str.size() > 1024) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size()); + + if (!UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key"); + + validatePartitionKey(str, true); +} + +void PartitionedStorageObjectStorageSink::validateNamespace(const String & str) +{ + configuration->validateNamespace(str); + + if (!UTF8::isValidUTF8(reinterpret_cast(str.data()), str.size())) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name"); + + validatePartitionKey(str, false); +} + } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 14298376d0e..a352e2c66a3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -54,6 +54,9 @@ public: SinkPtr createSinkForPartition(const String & partition_id) override; private: + void validateKey(const String & str); + void validateNamespace(const String & str); + ObjectStoragePtr object_storage; StorageObjectStorageConfigurationPtr configuration; const std::optional format_settings; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 3eff6e0f6c9..e8f32388b1b 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -108,7 +108,7 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() if (max_upload_part_size > max_upload_part_size_limit) throw Exception( ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}", + "Setting max_upload_part_size has invalid value {} which is greater than the s3 API limit {}", ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); if (max_single_part_upload_size > max_upload_part_size_limit) diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.sh b/tests/queries/0_stateless/02114_hdfs_bad_url.sh index 22975dddf6f..5bd5610a9f0 100755 --- a/tests/queries/0_stateless/02114_hdfs_bad_url.sh +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.sh @@ -23,4 +23,3 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')" $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "HDFS_ERROR" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1@nameservice/abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "HDFS_ERROR" && echo 'OK' || echo 'FAIL'; - diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index d831c7d9806..a34a480a078 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -13,7 +13,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) - SETTINGS s3_max_single_part_upload_size = '10Gi'; + SETTINGS s3_max_single_part_upload_size = '5Gi'; SELECT count() FROM s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv'); " diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql index da76a5cb88f..1e99eb8b83d 100644 --- a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql +++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql @@ -2,5 +2,4 @@ select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } - +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_DETECT_FORMAT } From cb97f8dab52aeaf492530d66a8553c422ffbcebd Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 25 Mar 2024 19:22:20 +0100 Subject: [PATCH 067/651] Fix style check --- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 1 - .../ObjectStorage/StorageObjectStorageConfiguration.cpp | 4 ++++ src/Storages/ObjectStorage/StorageObjectStorageSink.cpp | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 3a894af3e01..8d85224cff0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -27,7 +27,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int DATABASE_ACCESS_DENIED; extern const int NOT_IMPLEMENTED; - } template diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 9a8b8191907..1d5c0cd3a39 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -4,6 +4,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} void StorageObjectStorageConfiguration::initialize( StorageObjectStorageConfiguration & configuration, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 2dd8516ebe8..cf1c583ca62 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_PARSE_TEXT; + extern const int BAD_ARGUMENTS; } StorageObjectStorageSink::StorageObjectStorageSink( From 7a991de488567a255086a14faa830e1ba1610924 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 27 Mar 2024 19:06:19 +0100 Subject: [PATCH 068/651] Fix tests --- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 68 ++++++++++++++-- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 2 + .../ObjectStorages/ObjectStorageIterator.h | 24 +++--- .../ObjectStorageIteratorAsync.cpp | 12 +++ .../ObjectStorageIteratorAsync.h | 15 ++-- .../ObjectStorages/S3/S3ObjectStorage.cpp | 23 ++++-- .../ObjectStorage/HDFS/Configuration.cpp | 81 +++++++++++++++---- .../ObjectStorage/HDFS/Configuration.h | 3 + .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp | 17 ++-- .../ReadFromStorageObjectStorage.cpp | 4 +- .../ObjectStorage/StorageObjectStorage.cpp | 4 +- .../StorageObjectStorageCluster.cpp | 3 +- .../StorageObjectStorageConfiguration.cpp | 3 +- .../StorageObjectStorageConfiguration.h | 2 +- .../StorageObjectStorageQuerySettings.h | 4 + .../StorageObjectStorageSource.cpp | 56 ++++++++++--- .../StorageObjectStorageSource.h | 31 ++++--- src/Storages/S3Queue/S3QueueSource.cpp | 5 +- src/Storages/S3Queue/S3QueueSource.h | 2 +- src/Storages/S3Queue/StorageS3Queue.cpp | 2 +- tests/integration/test_storage_hdfs/test.py | 4 +- 21 files changed, 279 insertions(+), 86 deletions(-) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 2d03de60c3c..db79ff365aa 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #if USE_HDFS @@ -18,6 +19,7 @@ namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; extern const int HDFS_ERROR; + extern const int ACCESS_DENIED; } void HDFSObjectStorage::shutdown() @@ -48,7 +50,7 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLIN std::optional, std::optional) const { - return std::make_unique(object.remote_path, object.remote_path, config, patchSettings(read_settings)); + return std::make_unique(hdfs_root_path, object.remote_path, config, patchSettings(read_settings)); } std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT @@ -62,12 +64,12 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI [this, disk_read_settings] (bool /* restricted_seek */, const std::string & path) -> std::unique_ptr { - size_t begin_of_path = path.find('/', path.find("//") + 2); - auto hdfs_path = path.substr(begin_of_path); - auto hdfs_uri = path.substr(0, begin_of_path); + // size_t begin_of_path = path.find('/', path.find("//") + 2); + // auto hdfs_path = path.substr(begin_of_path); + // auto hdfs_uri = path.substr(0, begin_of_path); return std::make_unique( - hdfs_uri, hdfs_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); + hdfs_root_path, path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); }; return std::make_unique( @@ -131,7 +133,8 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co { auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data()); if (!file_info) - throw Exception(ErrorCodes::HDFS_ERROR, "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError()); + throw Exception(ErrorCodes::HDFS_ERROR, + "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError()); ObjectMetadata metadata; metadata.size_bytes = static_cast(file_info->mSize); @@ -141,6 +144,54 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co return metadata; } +void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const +{ + auto * log = &Poco::Logger::get("HDFSObjectStorage"); + LOG_TRACE(log, "Trying to list files for {}", path); + + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length); + + if (ls.file_info == nullptr && errno != ENOENT) // NOLINT + { + // ignore file not found exception, keep throw other exception, + // libhdfs3 doesn't have function to get exception type, so use errno. + throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", + path, String(hdfsGetLastError())); + } + + if (!ls.file_info && ls.length > 0) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); + } + + LOG_TRACE(log, "Listed {} files for {}", ls.length, path); + + for (int i = 0; i < ls.length; ++i) + { + const String file_path = fs::path(ls.file_info[i].mName).lexically_normal(); + const size_t last_slash = file_path.rfind('/'); + const String file_name = file_path.substr(last_slash); + + const bool is_directory = ls.file_info[i].mKind == 'D'; + if (is_directory) + { + listObjects(fs::path(file_path) / "", children, max_keys); + } + else + { + LOG_TEST(log, "Found file: {}", file_path); + + children.emplace_back(std::make_shared( + String(file_path), + ObjectMetadata{ + static_cast(ls.file_info[i].mSize), + Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod), + {}})); + } + } +} + void HDFSObjectStorage::copyObject( /// NOLINT const StoredObject & object_from, const StoredObject & object_to, @@ -160,7 +211,10 @@ void HDFSObjectStorage::copyObject( /// NOLINT } -std::unique_ptr HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr) +std::unique_ptr HDFSObjectStorage::cloneObjectStorage( + const std::string &, + const Poco::Util::AbstractConfiguration &, + const std::string &, ContextPtr) { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning"); } diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index f92e160fd4d..24642ec635a 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -92,6 +92,8 @@ public: const WriteSettings & write_settings, std::optional object_to_attributes = {}) override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; + void shutdown() override; void startup() override; diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index e934fc2056d..26c3c690ba5 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -27,9 +27,7 @@ class ObjectStorageIteratorFromList : public IObjectStorageIterator public: explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_) : batch(std::move(batch_)) - , batch_iterator(batch.begin()) - { - } + , batch_iterator(batch.begin()) {} void next() override { @@ -37,21 +35,23 @@ public: ++batch_iterator; } - void nextBatch() override - { - batch_iterator = batch.end(); - } + void nextBatch() override { batch_iterator = batch.end(); } - bool isValid() override - { - return batch_iterator != batch.end(); - } + bool isValid() override { return batch_iterator != batch.end(); } RelativePathWithMetadataPtr current() override; RelativePathsWithMetadata currentBatch() override { return batch; } - std::optional getCurrentBatchAndScheduleNext() override { return std::nullopt; } + std::optional getCurrentBatchAndScheduleNext() override + { + if (batch.empty()) + return {}; + + auto current_batch = std::move(batch); + batch = {}; + return current_batch; + } size_t getAccumulatedSize() const override { return batch.size(); } diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index f441b18d59d..94a0751dcc8 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -21,6 +21,18 @@ IObjectStorageIteratorAsync::IObjectStorageIteratorAsync( { } +IObjectStorageIteratorAsync::~IObjectStorageIteratorAsync() +{ + if (!deactivated) + deactivate(); +} + +void IObjectStorageIteratorAsync::deactivate() +{ + list_objects_pool.wait(); + deactivated = true; +} + void IObjectStorageIteratorAsync::nextBatch() { std::lock_guard lock(mutex); diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index c4bde91f415..3e3269fb550 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -19,18 +19,20 @@ public: CurrentMetrics::Metric threads_scheduled_metric, const std::string & thread_name); - void next() override; - void nextBatch() override; + ~IObjectStorageIteratorAsync() override; + bool isValid() override; + RelativePathWithMetadataPtr current() override; RelativePathsWithMetadata currentBatch() override; + + void next() override; + void nextBatch() override; + size_t getAccumulatedSize() const override; std::optional getCurrentBatchAndScheduleNext() override; - ~IObjectStorageIteratorAsync() override - { - list_objects_pool.wait(); - } + void deactivate(); protected: @@ -46,6 +48,7 @@ protected: bool is_initialized{false}; bool is_finished{false}; + bool deactivated{false}; mutable std::recursive_mutex mutex; ThreadPool list_objects_pool; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 33c0afda4c1..d902a33ae4a 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -110,10 +110,19 @@ public: CurrentMetrics::ObjectStorageS3ThreadsScheduled, "ListObjectS3") , client(client_) + , request(std::make_unique()) { - request.SetBucket(bucket_); - request.SetPrefix(path_prefix); - request.SetMaxKeys(static_cast(max_list_size)); + request->SetBucket(bucket_); + request->SetPrefix(path_prefix); + request->SetMaxKeys(static_cast(max_list_size)); + } + + ~S3IteratorAsync() override + { + /// Deactivate background threads before resetting the request to avoid data race. + deactivate(); + request.reset(); + client.reset(); } private: @@ -121,12 +130,12 @@ private: { ProfileEvents::increment(ProfileEvents::S3ListObjects); - auto outcome = client->ListObjectsV2(request); + auto outcome = client->ListObjectsV2(*request); /// Outcome failure will be handled on the caller side. if (outcome.IsSuccess()) { - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + request->SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); auto objects = outcome.GetResult().GetContents(); for (const auto & object : objects) @@ -141,12 +150,12 @@ private: throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), + quoteString(request->GetBucket()), quoteString(request->GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } std::shared_ptr client; - S3::ListObjectsV2Request request; + std::unique_ptr request; }; } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 5edc660d717..50e8918a12e 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -8,6 +8,8 @@ #include #include #include +#include + namespace DB { @@ -28,7 +30,7 @@ StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguratio void StorageHDFSConfiguration::check(ContextPtr context) const { context->getRemoteHostFilter().checkURL(Poco::URI(url)); - checkHDFSURL(fs::path(url) / path); + checkHDFSURL(fs::path(url) / path.substr(1)); } ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT @@ -44,9 +46,22 @@ ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT return std::make_shared(url, std::move(hdfs_settings), context->getConfigRef()); } -void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool /* with_structure */) +std::string StorageHDFSConfiguration::getPathWithoutGlob() const { - url = checkAndGetLiteralArgument(args[0], "url"); + /// Unlike s3 and azure, which are object storages, + /// hdfs is a filesystem, so it cannot list files by partual prefix, + /// only by directory. + auto first_glob_pos = path.find_first_of("*?{"); + auto end_of_path_without_globs = path.substr(0, first_glob_pos).rfind('/'); + if (end_of_path_without_globs == std::string::npos || end_of_path_without_globs == 0) + return "/"; + return path.substr(0, end_of_path_without_globs); +} + +void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) +{ + std::string url_str; + url_str = checkAndGetLiteralArgument(args[0], "url"); if (args.size() > 1) { @@ -54,28 +69,60 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool /* format = checkAndGetLiteralArgument(args[1], "format_name"); } - if (args.size() == 3) + if (with_structure) + { + if (args.size() > 2) + { + structure = checkAndGetLiteralArgument(args[2], "structure"); + } + if (args.size() > 3) + { + args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context); + compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); + } + } + else if (args.size() > 2) { args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); } - auto pos = url.find("//"); - if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url); - - pos = url.find('/', pos + 2); - if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url); - - path = url.substr(pos + 1); - url = url.substr(0, pos); - paths = {path}; + setURL(url_str); } -void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection &) +void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & collection) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method fromNamedColection() is not implemented"); + std::string url_str; + + auto filename = collection.getOrDefault("filename", ""); + if (!filename.empty()) + url_str = std::filesystem::path(collection.get("url")) / filename; + else + url_str = collection.get("url"); + + format = collection.getOrDefault("format", "auto"); + compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + structure = collection.getOrDefault("structure", "auto"); + + setURL(url_str); +} + +void StorageHDFSConfiguration::setURL(const std::string url_) +{ + auto pos = url_.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); + + pos = url_.find('/', pos + 2); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); + + path = url_.substr(pos + 1); + url = url_.substr(0, pos); + path = '/' + path; + paths = {path}; + + LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path); } } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 5765edbf36c..8506c7c9700 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -31,9 +31,12 @@ public: static void addStructureToArgs(ASTs &, const String &, ContextPtr) {} + std::string getPathWithoutGlob() const override; + private: void fromNamedCollection(const NamedCollection &) override; void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; + void setURL(const std::string url_); String url; String path; diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index 18b22805dfc..c29189804e6 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -55,10 +56,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) - , builder(createHDFSBuilder(hdfs_uri_, config_)) , read_settings(read_settings_) , read_until_position(read_until_position_) { + builder = createHDFSBuilder(hdfs_uri_, config_); fs = createHDFSFS(builder.get()); fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0); @@ -96,11 +97,14 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory {})", file_offset, read_until_position - 1); @@ -111,10 +115,11 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory= file_size) - { - return false; - } + // if (file_size != 0 && file_offset >= file_size) + // { + // LOG_TEST(log, "KSSENII 1 2"); + // return false; + // } ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read); int bytes_read; @@ -145,6 +150,8 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemoryadd(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); + + LOG_TEST(log, "KSSENII SIZE: {}", bytes_read); return true; } diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp index ce157972161..f2595299430 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp @@ -49,8 +49,8 @@ void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predi { auto context = getContext(); iterator_wrapper = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, distributed_processing, context, predicate, - virtual_columns, nullptr, query_settings.list_object_keys_size, metric_threads_count, + configuration, object_storage, query_settings, distributed_processing, + context, predicate, virtual_columns, nullptr, metric_threads_count, metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback()); } } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 8d85224cff0..0276ff62778 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -288,8 +288,8 @@ std::unique_ptr StorageObjectStorage::creat { const auto settings = StorageSettings::create(context->getSettingsRef()); auto file_iterator = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, /* distributed_processing */false, - context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size, + configuration, object_storage, settings, /* distributed_processing */false, + context, /* predicate */{}, /* virtual_columns */{}, &read_keys, StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled()); return std::make_unique( diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index c5421f1d319..f023bb068d4 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -92,7 +92,8 @@ StorageObjectStorageCluster::getTask const auto settings = StorageSettings::create(local_context->getSettingsRef()); auto iterator = std::make_shared( object_storage, configuration, predicate, virtual_columns, local_context, - nullptr, settings.list_object_keys_size, local_context->getFileProgressCallback()); + nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match, + local_context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 1d5c0cd3a39..61e569cee05 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -40,7 +40,8 @@ StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const Stora bool StorageObjectStorageConfiguration::withWildcard() const { static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return getPath().find(PARTITION_ID_WILDCARD) != String::npos; + return getPath().find(PARTITION_ID_WILDCARD) != String::npos + || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; } bool StorageObjectStorageConfiguration::isPathWithGlobs() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 0beed91b128..48825c6a012 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -40,7 +40,7 @@ public: bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } bool isPathWithGlobs() const; bool isNamespaceWithGlobs() const; - std::string getPathWithoutGlob() const; + virtual std::string getPathWithoutGlob() const; virtual void check(ContextPtr context) const = 0; virtual void validateNamespace(const String & /* name */) const {} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h index 454da7c355f..8bcc2ad3b37 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h @@ -25,6 +25,7 @@ struct StorageObjectStorageSettings SchemaInferenceMode schema_inference_mode; bool skip_empty_files; size_t list_object_keys_size; + bool throw_on_zero_files_match; }; struct S3StorageSettings @@ -38,6 +39,7 @@ struct S3StorageSettings .schema_inference_mode = settings.schema_inference_mode, .skip_empty_files = settings.s3_skip_empty_files, .list_object_keys_size = settings.s3_list_object_keys_size, + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, }; } @@ -59,6 +61,7 @@ struct AzureStorageSettings .schema_inference_mode = settings.schema_inference_mode, .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure .list_object_keys_size = settings.azure_list_object_keys_size, + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, }; } @@ -80,6 +83,7 @@ struct HDFSStorageSettings .schema_inference_mode = settings.schema_inference_mode, .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 3c8484194c9..5a88f1436c1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int CANNOT_COMPILE_REGEXP; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int FILE_DOESNT_EXIST; } StorageObjectStorageSource::StorageObjectStorageSource( @@ -75,12 +76,12 @@ StorageObjectStorageSource::~StorageObjectStorageSource() std::shared_ptr StorageObjectStorageSource::createFileIterator( ConfigurationPtr configuration, ObjectStoragePtr object_storage, + const StorageObjectStorageSettings & settings, bool distributed_processing, const ContextPtr & local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, - size_t list_object_keys_size, CurrentMetrics::Metric metric_threads_, CurrentMetrics::Metric metric_threads_active_, CurrentMetrics::Metric metric_threads_scheduled_, @@ -99,12 +100,14 @@ std::shared_ptr StorageObjectStorageSourc { /// Iterate through disclosed globs and make a source for each file return std::make_shared( - object_storage, configuration, predicate, virtual_columns, local_context, read_keys, list_object_keys_size, file_progress_callback); + object_storage, configuration, predicate, virtual_columns, local_context, + read_keys, settings.list_object_keys_size, settings.throw_on_zero_files_match, file_progress_callback); } else { return std::make_shared( - object_storage, configuration, virtual_columns, read_keys, file_progress_callback); + object_storage, configuration, virtual_columns, read_keys, + settings.throw_on_zero_files_match, file_progress_callback); } } @@ -209,6 +212,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade do { object_info = file_iterator->next(processor); + if (!object_info || object_info->relative_path.empty()) return {}; @@ -226,8 +230,11 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade ? tryGetNumRowsFromCache(object_info) : std::nullopt; + LOG_TRACE(&Poco::Logger::get("kssenii"), "HAS NUM ROWS FROM CACHE: {}", num_rows_from_cache.has_value()); if (num_rows_from_cache) { + LOG_TRACE(&Poco::Logger::get("kssenii"), "NUM ROWS FROM CACHE: {}", num_rows_from_cache.value()); + /// We should not return single chunk with all number of rows, /// because there is a chance that this chunk will be materialized later /// (it can cause memory problems even with default values in columns or when virtual columns are requested). @@ -324,6 +331,29 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const S } } +StorageObjectStorageSource::IIterator::IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_) + : throw_on_zero_files_match(throw_on_zero_files_match_) + , logger(getLogger(logger_name_)) +{ +} + +ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) +{ + auto object_info = nextImpl(processor); + + if (object_info) + { + first_iteration = false; + LOG_TEST(&Poco::Logger::get("KeysIterator"), "Next key: {}", object_info->relative_path); + } + else if (first_iteration && throw_on_zero_files_match) + { + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files"); + } + + return object_info; +} + StorageObjectStorageSource::GlobIterator::GlobIterator( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, @@ -332,8 +362,10 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( ContextPtr context_, ObjectInfos * read_keys_, size_t list_object_keys_size, + bool throw_on_zero_files_match_, std::function file_progress_callback_) - : WithContext(context_) + : IIterator(throw_on_zero_files_match_, "GlobIterator") + , WithContext(context_) , object_storage(object_storage_) , configuration(configuration_) , virtual_columns(virtual_columns_) @@ -380,7 +412,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } } -ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor */) +ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t /* processor */) { std::lock_guard lock(next_mutex); @@ -401,9 +433,10 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor } new_batch = std::move(result.value()); + LOG_TEST(logger, "Batch size: {}", new_batch.size()); + for (auto it = new_batch.begin(); it != new_batch.end();) { - chassert(*it); if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher)) it = new_batch.erase(it); else @@ -452,8 +485,10 @@ StorageObjectStorageSource::KeysIterator::KeysIterator( ConfigurationPtr configuration_, const NamesAndTypesList & virtual_columns_, ObjectInfos * read_keys_, + bool throw_on_zero_files_match_, std::function file_progress_callback_) - : object_storage(object_storage_) + : IIterator(throw_on_zero_files_match_, "KeysIterator") + , object_storage(object_storage_) , configuration(configuration_) , virtual_columns(virtual_columns_) , file_progress_callback(file_progress_callback_) @@ -470,7 +505,7 @@ StorageObjectStorageSource::KeysIterator::KeysIterator( } } -ObjectInfoPtr StorageObjectStorageSource::KeysIterator::next(size_t /* processor */) +ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) @@ -520,7 +555,8 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( CurrentMetrics::Metric metric_threads_, CurrentMetrics::Metric metric_threads_active_, CurrentMetrics::Metric metric_threads_scheduled_) - : callback(callback_) + : IIterator(false, "ReadTaskIterator") + , callback(callback_) { ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count); auto pool_scheduler = threadPoolCallbackRunner(pool, "ReadTaskIter"); @@ -540,7 +576,7 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( } } -ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::next(size_t) +ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= buffer.size()) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index d02cb4a3a90..7c5497a6eaa 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -53,12 +53,12 @@ public: static std::shared_ptr createFileIterator( ConfigurationPtr configuration, ObjectStoragePtr object_storage, + const StorageObjectStorageSettings & settings, bool distributed_processing, const ContextPtr & local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, - size_t list_object_keys_size, CurrentMetrics::Metric metric_threads_, CurrentMetrics::Metric metric_threads_active_, CurrentMetrics::Metric metric_threads_scheduled_, @@ -133,10 +133,21 @@ protected: class StorageObjectStorageSource::IIterator { public: + IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_); + virtual ~IIterator() = default; virtual size_t estimatedKeysCount() = 0; - virtual ObjectInfoPtr next(size_t processor) = 0; + + ObjectInfoPtr next(size_t processor); + +protected: + virtual ObjectInfoPtr nextImpl(size_t processor) = 0; + +protected: + const bool throw_on_zero_files_match; + bool first_iteration = true; + LoggerPtr logger; }; class StorageObjectStorageSource::ReadTaskIterator : public IIterator @@ -151,9 +162,9 @@ public: size_t estimatedKeysCount() override { return buffer.size(); } - ObjectInfoPtr next(size_t) override; - private: + ObjectInfoPtr nextImpl(size_t) override; + ReadTaskCallback callback; ObjectInfos buffer; std::atomic_size_t index = 0; @@ -170,15 +181,17 @@ public: ContextPtr context_, ObjectInfos * read_keys_, size_t list_object_keys_size, + bool throw_on_zero_files_match_, std::function file_progress_callback_ = {}); ~GlobIterator() override = default; size_t estimatedKeysCount() override { return object_infos.size(); } - ObjectInfoPtr next(size_t processor) override; - private: + ObjectInfoPtr nextImpl(size_t processor) override; + void createFilterAST(const String & any_key); + ObjectStoragePtr object_storage; ConfigurationPtr configuration; ActionsDAGPtr filter_dag; @@ -193,7 +206,6 @@ private: std::unique_ptr matcher; - void createFilterAST(const String & any_key); bool is_finished = false; std::mutex next_mutex; @@ -208,15 +220,16 @@ public: ConfigurationPtr configuration_, const NamesAndTypesList & virtual_columns_, ObjectInfos * read_keys_, + bool throw_on_zero_files_match_, std::function file_progress_callback = {}); ~KeysIterator() override = default; size_t estimatedKeysCount() override { return keys.size(); } - ObjectInfoPtr next(size_t processor) override; - private: + ObjectInfoPtr nextImpl(size_t processor) override; + const ObjectStoragePtr object_storage; const ConfigurationPtr configuration; const NamesAndTypesList virtual_columns; diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 42cd210018a..ee3071ea71f 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -45,7 +45,8 @@ StorageS3QueueSource::FileIterator::FileIterator( std::unique_ptr glob_iterator_, size_t current_shard_, std::atomic & shutdown_called_) - : metadata(metadata_) + : StorageObjectStorageSource::IIterator(false, "S3QueueIterator") + , metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) , log(&Poco::Logger::get("StorageS3QueueSource")) @@ -59,7 +60,7 @@ StorageS3QueueSource::FileIterator::FileIterator( } } -StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor) +StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) { while (!shutdown_called) { diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 2bdac7f2311..8c785e683c2 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -56,7 +56,7 @@ public: /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - ObjectInfoPtr next(size_t processor) override; + ObjectInfoPtr nextImpl(size_t processor) override; size_t estimatedKeysCount() override; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index aafcdc39f9e..c5799d23abd 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -598,7 +598,7 @@ std::shared_ptr StorageS3Queue::createFileIterator { auto settings = S3StorageSettings::create(local_context->getSettingsRef()); auto glob_iterator = std::make_unique( - object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size); + object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match); return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 9dec1954406..5632c7ae060 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -61,7 +61,7 @@ def test_read_write_storage_with_globs(started_cluster): hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n") assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n" - assert node1.query("select count(*) from HDFSStorageWithRange") == "3\n" + assert node1.query("select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1") == "3\n" assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n" assert node1.query("select count(*) from HDFSStorageWithQuestionMark") == "3\n" assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n" @@ -159,7 +159,7 @@ def test_bad_hdfs_uri(started_cluster): ) except Exception as ex: print(ex) - assert "Unable to create builder to connect to HDFS" in str(ex) + assert "Unable to connect to HDFS" in str(ex) try: node1.query( From 480251e5932f2d15891a403887b5afc96f40ee89 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 27 Mar 2024 19:28:11 +0100 Subject: [PATCH 069/651] Fix style check --- tests/integration/test_storage_hdfs/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 5632c7ae060..f6e486d6594 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -61,7 +61,12 @@ def test_read_write_storage_with_globs(started_cluster): hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n") assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n" - assert node1.query("select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1") == "3\n" + assert ( + node1.query( + "select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1" + ) + == "3\n" + ) assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n" assert node1.query("select count(*) from HDFSStorageWithQuestionMark") == "3\n" assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n" From a2e210462d7d78212c32408ea3d276ef366b57c4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 27 Mar 2024 22:31:22 +0100 Subject: [PATCH 070/651] Fix style check --- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp | 1 + src/Storages/ObjectStorage/HDFS/Configuration.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index db79ff365aa..9bc75b740e5 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; extern const int HDFS_ERROR; extern const int ACCESS_DENIED; + extern const int LOGICAL_ERROR; } void HDFSObjectStorage::shutdown() diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 50e8918a12e..3828afc0bea 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -16,7 +16,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int NOT_IMPLEMENTED; } StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) From 5c63d09c5bb91f7dc159befeb505a74e4c0257a5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 28 Mar 2024 14:15:14 +0100 Subject: [PATCH 071/651] More tests fixes --- src/Core/Settings.h | 3 + .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 4 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 8 +- .../ObjectStorage/HDFS/Configuration.cpp | 18 ++++- .../ObjectStorage/HDFS/Configuration.h | 2 +- .../StorageObjectStorageQuerySettings.h | 4 + .../StorageObjectStorageSource.cpp | 76 ++++++++++++------- .../StorageObjectStorageSource.h | 20 ++--- src/Storages/S3Queue/S3QueueSource.cpp | 2 +- tests/integration/test_storage_hdfs/test.py | 12 +-- .../0_stateless/02725_database_hdfs.sh | 3 +- 11 files changed, 98 insertions(+), 54 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f8f3595094c..2fae390c35b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -113,6 +113,9 @@ class IColumn; M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, s3_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageS3", 0) \ + M(Bool, hdfs_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS", 0) \ + M(Bool, azure_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageAzure", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 85d3e921f22..8bfba6fcfad 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -103,10 +103,10 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL void HDFSObjectStorage::removeObject(const StoredObject & object) { const auto & path = object.remote_path; - const size_t begin_of_path = path.find('/', path.find("//") + 2); + // const size_t begin_of_path = path.find('/', path.find("//") + 2); /// Add path from root to file name - int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0); + int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0); if (res == -1) throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: {}", path); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index b9995620c0f..9085fddfd08 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -432,7 +432,9 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects) std::optional S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); + auto object_info = S3::getObjectInfo( + *client.get(), uri.bucket, path, {}, settings_ptr->request_settings, + /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty()) return {}; @@ -448,7 +450,9 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); + auto object_info = S3::getObjectInfo( + *client.get(), uri.bucket, path, {}, settings_ptr->request_settings, + /* with_metadata= */ true, /* for_disk_s3= */ true); ObjectMetadata result; result.size_bytes = object_info.size; diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 3828afc0bea..594f0b89454 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) @@ -62,6 +63,13 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit std::string url_str; url_str = checkAndGetLiteralArgument(args[0], "url"); + const size_t max_args_num = with_structure ? 4 : 3; + if (args.size() > max_args_num) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected not more than {} arguments", max_args_num); + } + if (args.size() > 1) { args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); @@ -72,6 +80,7 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit { if (args.size() > 2) { + args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); structure = checkAndGetLiteralArgument(args[2], "structure"); } if (args.size() > 3) @@ -100,13 +109,14 @@ void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & colle url_str = collection.get("url"); format = collection.getOrDefault("format", "auto"); - compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + compression_method = collection.getOrDefault("compression_method", + collection.getOrDefault("compression", "auto")); structure = collection.getOrDefault("structure", "auto"); setURL(url_str); } -void StorageHDFSConfiguration::setURL(const std::string url_) +void StorageHDFSConfiguration::setURL(const std::string & url_) { auto pos = url_.find("//"); if (pos == std::string::npos) @@ -117,8 +127,10 @@ void StorageHDFSConfiguration::setURL(const std::string url_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_); path = url_.substr(pos + 1); + if (!path.starts_with('/')) + path = '/' + path; + url = url_.substr(0, pos); - path = '/' + path; paths = {path}; LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path); diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 8506c7c9700..7154f790665 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -36,7 +36,7 @@ public: private: void fromNamedCollection(const NamedCollection &) override; void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; - void setURL(const std::string url_); + void setURL(const std::string & url_); String url; String path; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h index 8bcc2ad3b37..f0687776aa7 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h @@ -26,6 +26,7 @@ struct StorageObjectStorageSettings bool skip_empty_files; size_t list_object_keys_size; bool throw_on_zero_files_match; + bool ignore_non_existent_file; }; struct S3StorageSettings @@ -40,6 +41,7 @@ struct S3StorageSettings .skip_empty_files = settings.s3_skip_empty_files, .list_object_keys_size = settings.s3_list_object_keys_size, .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist, }; } @@ -62,6 +64,7 @@ struct AzureStorageSettings .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure .list_object_keys_size = settings.azure_list_object_keys_size, .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist, }; } @@ -84,6 +87,7 @@ struct HDFSStorageSettings .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 5a88f1436c1..80aa0c210e9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -100,14 +100,15 @@ std::shared_ptr StorageObjectStorageSourc { /// Iterate through disclosed globs and make a source for each file return std::make_shared( - object_storage, configuration, predicate, virtual_columns, local_context, - read_keys, settings.list_object_keys_size, settings.throw_on_zero_files_match, file_progress_callback); + object_storage, configuration, predicate, virtual_columns, + local_context, read_keys, settings.list_object_keys_size, + settings.throw_on_zero_files_match, file_progress_callback); } else { return std::make_shared( object_storage, configuration, virtual_columns, read_keys, - settings.throw_on_zero_files_match, file_progress_callback); + settings.ignore_non_existent_file, file_progress_callback); } } @@ -331,9 +332,8 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const S } } -StorageObjectStorageSource::IIterator::IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_) - : throw_on_zero_files_match(throw_on_zero_files_match_) - , logger(getLogger(logger_name_)) +StorageObjectStorageSource::IIterator::IIterator(const std::string & logger_name_) + : logger(getLogger(logger_name_)) { } @@ -343,13 +343,8 @@ ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) if (object_info) { - first_iteration = false; LOG_TEST(&Poco::Logger::get("KeysIterator"), "Next key: {}", object_info->relative_path); } - else if (first_iteration && throw_on_zero_files_match) - { - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files"); - } return object_info; } @@ -364,11 +359,12 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( size_t list_object_keys_size, bool throw_on_zero_files_match_, std::function file_progress_callback_) - : IIterator(throw_on_zero_files_match_, "GlobIterator") + : IIterator("GlobIterator") , WithContext(context_) , object_storage(object_storage_) , configuration(configuration_) , virtual_columns(virtual_columns_) + , throw_on_zero_files_match(throw_on_zero_files_match_) , read_keys(read_keys_) , file_progress_callback(file_progress_callback_) { @@ -412,10 +408,24 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } } -ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t /* processor */) +ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor) { std::lock_guard lock(next_mutex); + auto object_info = nextImplUnlocked(processor); + if (object_info) + { + if (first_iteration) + first_iteration = false; + } + else if (first_iteration && throw_on_zero_files_match) + { + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files"); + } + return object_info; +} +ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */) +{ bool current_batch_processed = object_infos.empty() || index >= object_infos.size(); if (is_finished && current_batch_processed) return {}; @@ -485,14 +495,15 @@ StorageObjectStorageSource::KeysIterator::KeysIterator( ConfigurationPtr configuration_, const NamesAndTypesList & virtual_columns_, ObjectInfos * read_keys_, - bool throw_on_zero_files_match_, + bool ignore_non_existent_files_, std::function file_progress_callback_) - : IIterator(throw_on_zero_files_match_, "KeysIterator") + : IIterator("KeysIterator") , object_storage(object_storage_) , configuration(configuration_) , virtual_columns(virtual_columns_) , file_progress_callback(file_progress_callback_) , keys(configuration->getPaths()) + , ignore_non_existent_files(ignore_non_existent_files_) { if (read_keys_) { @@ -507,20 +518,29 @@ StorageObjectStorageSource::KeysIterator::KeysIterator( ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */) { - size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= keys.size()) - return {}; - - auto key = keys[current_index]; - - ObjectMetadata metadata{}; - if (file_progress_callback) + while (true) { - metadata = object_storage->getObjectMetadata(key); - file_progress_callback(FileProgress(0, metadata.size_bytes)); - } + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= keys.size()) + return {}; - return std::make_shared(key, metadata); + auto key = keys[current_index]; + + ObjectMetadata object_metadata{}; + if (ignore_non_existent_files) + { + auto metadata = object_storage->tryGetObjectMetadata(key); + if (!metadata) + continue; + } + else + object_metadata = object_storage->getObjectMetadata(key); + + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); + + return std::make_shared(key, object_metadata); + } } StorageObjectStorageSource::ReaderHolder::ReaderHolder( @@ -555,7 +575,7 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( CurrentMetrics::Metric metric_threads_, CurrentMetrics::Metric metric_threads_active_, CurrentMetrics::Metric metric_threads_scheduled_) - : IIterator(false, "ReadTaskIterator") + : IIterator("ReadTaskIterator") , callback(callback_) { ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 7c5497a6eaa..3d4cc4fbd20 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -133,7 +133,7 @@ protected: class StorageObjectStorageSource::IIterator { public: - IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_); + explicit IIterator(const std::string & logger_name_); virtual ~IIterator() = default; @@ -143,10 +143,6 @@ public: protected: virtual ObjectInfoPtr nextImpl(size_t processor) = 0; - -protected: - const bool throw_on_zero_files_match; - bool first_iteration = true; LoggerPtr logger; }; @@ -190,23 +186,26 @@ public: private: ObjectInfoPtr nextImpl(size_t processor) override; + ObjectInfoPtr nextImplUnlocked(size_t processor); void createFilterAST(const String & any_key); - ObjectStoragePtr object_storage; - ConfigurationPtr configuration; - ActionsDAGPtr filter_dag; - NamesAndTypesList virtual_columns; + const ObjectStoragePtr object_storage; + const ConfigurationPtr configuration; + const NamesAndTypesList virtual_columns; + const bool throw_on_zero_files_match; size_t index = 0; ObjectInfos object_infos; ObjectInfos * read_keys; + ActionsDAGPtr filter_dag; ObjectStorageIteratorPtr object_storage_iterator; bool recursive{false}; std::unique_ptr matcher; bool is_finished = false; + bool first_iteration = true; std::mutex next_mutex; std::function file_progress_callback; @@ -220,7 +219,7 @@ public: ConfigurationPtr configuration_, const NamesAndTypesList & virtual_columns_, ObjectInfos * read_keys_, - bool throw_on_zero_files_match_, + bool ignore_non_existent_files_, std::function file_progress_callback = {}); ~KeysIterator() override = default; @@ -236,5 +235,6 @@ private: const std::function file_progress_callback; const std::vector keys; std::atomic index = 0; + bool ignore_non_existent_files; }; } diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index ee3071ea71f..8e7155205c4 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -45,7 +45,7 @@ StorageS3QueueSource::FileIterator::FileIterator( std::unique_ptr glob_iterator_, size_t current_shard_, std::atomic & shutdown_called_) - : StorageObjectStorageSource::IIterator(false, "S3QueueIterator") + : StorageObjectStorageSource::IIterator("S3QueueIterator") , metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index f6e486d6594..fbf97adcee0 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -326,7 +326,7 @@ def test_virtual_columns(started_cluster): hdfs_api.write_data("/file1", "1\n") hdfs_api.write_data("/file2", "2\n") hdfs_api.write_data("/file3", "3\n") - expected = "1\tfile1\thdfs://hdfs1:9000/file1\n2\tfile2\thdfs://hdfs1:9000/file2\n3\tfile3\thdfs://hdfs1:9000/file3\n" + expected = "1\tfile1\t/file1\n2\tfile2\t/file2\n3\tfile3\t/file3\n" assert ( node1.query( "select id, _file as file_name, _path as file_path from virtual_cols order by id" @@ -365,7 +365,7 @@ def test_truncate_table(started_cluster): assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n" assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n" node1.query("truncate table test_truncate") - assert node1.query("select * from test_truncate") == "" + assert node1.query("select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1") == "" node1.query("drop table test_truncate") @@ -488,13 +488,13 @@ def test_hdfsCluster(started_cluster): actual = node1.query( "select id, _file as file_name, _path as file_path from hdfs('hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id" ) - expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n" + expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n" assert actual == expected actual = node1.query( "select id, _file as file_name, _path as file_path from hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id" ) - expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n" + expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n" assert actual == expected fs.delete(dir, recursive=True) @@ -502,7 +502,7 @@ def test_hdfsCluster(started_cluster): def test_hdfs_directory_not_exist(started_cluster): ddl = "create table HDFSStorageWithNotExistDir (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/data/not_eixst', 'TSV')" node1.query(ddl) - assert "" == node1.query("select * from HDFSStorageWithNotExistDir") + assert "" == node1.query("select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1") def test_overwrite(started_cluster): @@ -658,7 +658,7 @@ def test_virtual_columns_2(started_cluster): node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") result = node1.query(f"SELECT _path FROM {table_function}") - assert result.strip() == "hdfs://hdfs1:9000/parquet_2" + assert result.strip() == "/parquet_2" table_function = ( f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')" diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index d62f928e947..623af707542 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -60,7 +60,8 @@ SELECT * FROM \"abacaba/file.tsv\" ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: - +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: From 961704ba173bef199735c52e5296b371a5168f15 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 28 Mar 2024 15:00:49 +0100 Subject: [PATCH 072/651] Style check --- tests/integration/test_storage_hdfs/test.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index fbf97adcee0..77a55ced5c8 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -365,7 +365,12 @@ def test_truncate_table(started_cluster): assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n" assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n" node1.query("truncate table test_truncate") - assert node1.query("select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1") == "" + assert ( + node1.query( + "select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1" + ) + == "" + ) node1.query("drop table test_truncate") @@ -502,7 +507,9 @@ def test_hdfsCluster(started_cluster): def test_hdfs_directory_not_exist(started_cluster): ddl = "create table HDFSStorageWithNotExistDir (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/data/not_eixst', 'TSV')" node1.query(ddl) - assert "" == node1.query("select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1") + assert "" == node1.query( + "select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1" + ) def test_overwrite(started_cluster): From 34a87666ebe932fbedef68ac7fef05f2a6e5880a Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 28 Mar 2024 16:55:39 +0100 Subject: [PATCH 073/651] Update settings changes history --- src/Core/SettingsChangesHistory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index db6fb2f1c0e..8cde00fcc14 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -124,6 +124,9 @@ static std::map sett {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS"}, + {"azure_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageAzureBlob"}, + {"s3_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageS3"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From 422a3bd672d8c3f7f5bc050eaeca14415a013a60 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:16:51 +0200 Subject: [PATCH 074/651] Update version in SettingsChangesHistory.h --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 9fa1a71f58e..0b90d0216bf 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,8 +85,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.4", {{"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}}}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, From b24a2afd5fb6c44fd1ecd2435963f3433c61f2af Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 13:21:22 +0200 Subject: [PATCH 075/651] A few more test fixes --- src/TableFunctions/TableFunctionObjectStorageCluster.cpp | 5 +++-- src/TableFunctions/TableFunctionObjectStorageCluster.h | 8 ++++++++ tests/queries/0_stateless/02725_database_hdfs.sh | 6 ++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 4ec94cfaf7c..909ace788eb 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -17,9 +17,8 @@ namespace DB template StoragePtr TableFunctionObjectStorageCluster::executeImpl( const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const { - using Base = TableFunctionObjectStorage; auto configuration = Base::getConfiguration(); ColumnsDescription columns; @@ -27,6 +26,8 @@ StoragePtr TableFunctionObjectStorageClusterstructure, context); else if (!Base::structure_hint.empty()) columns = Base::structure_hint; + else if (!cached_columns.empty()) + columns = cached_columns; auto object_storage = Base::getObjectStorage(context, !is_insert_query); StoragePtr storage; diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h index 461456e37df..21c2f8995dc 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.h +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -67,6 +67,8 @@ public: String getSignature() const override { return signature; } protected: + using Base = TableFunctionObjectStorage; + StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, @@ -75,6 +77,12 @@ protected: bool is_insert_query) const override; const char * getStorageTypeName() const override { return Definition::storage_type_name; } + + bool hasStaticStructure() const override { return Base::getConfiguration()->structure != "auto"; } + + bool needStructureHint() const override { return Base::getConfiguration()->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { Base::structure_hint = structure_hint_; } }; #if USE_AWS_S3 diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh index 623af707542..1eb22976b84 100755 --- a/tests/queries/0_stateless/02725_database_hdfs.sh +++ b/tests/queries/0_stateless/02725_database_hdfs.sh @@ -58,10 +58,8 @@ SELECT * FROM \"abacaba/file.tsv\" """ 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "The data format cannot be detected" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "The table structure cannot be extracted" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: From aa804e744b1f1c233ef7158431feb4c016d0026c Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 14:05:50 +0200 Subject: [PATCH 076/651] Fix style check --- src/Storages/ObjectStorage/HDFS/Configuration.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 5a4fb322692..0a49ba5e251 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -153,7 +153,7 @@ void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & st { size_t count = args.size(); if (count == 0 || count > 3) - throw Exception(ErrorCodes::LOGICAL_ERROR, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected 1 to 3 arguments in table function, got {}", count); auto structure_literal = std::make_shared(structure_); From e5ffe3cf8d7362335ef6150e7864d5deb74c9479 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Apr 2024 16:15:11 +0200 Subject: [PATCH 077/651] More tests fixes --- src/Storages/MergeTree/KeyCondition.cpp | 7 +++++ .../ObjectStorage/AzureBlob/Configuration.cpp | 3 +- .../ObjectStorage/AzureBlob/Configuration.h | 4 ++- .../ObjectStorage/HDFS/Configuration.cpp | 28 +++++++++++++------ .../ObjectStorage/HDFS/Configuration.h | 4 ++- .../ReadFromStorageObjectStorage.cpp | 3 +- .../ReadFromStorageObjectStorage.h | 1 + .../ObjectStorage/S3/Configuration.cpp | 4 ++- src/Storages/ObjectStorage/S3/Configuration.h | 4 ++- .../ObjectStorage/StorageObjectStorage.cpp | 3 ++ .../StorageObjectStorageConfiguration.h | 1 + .../StorageObjectStorageSource.cpp | 16 ++++++++++- .../StorageObjectStorageSource.h | 2 +- .../TableFunctionObjectStorage.cpp | 4 +-- 14 files changed, 65 insertions(+), 19 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2d57ea40c9c..a720e243fdb 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -2661,6 +2661,13 @@ BoolMask KeyCondition::checkInHyperrectangle( else if (element.function == RPNElement::FUNCTION_IN_RANGE || element.function == RPNElement::FUNCTION_NOT_IN_RANGE) { + if (element.key_column >= hyperrectangle.size()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Hyperrectangle size is {}, but requested element at posittion {} ({})", + hyperrectangle.size(), element.key_column, element.toString()); + } + const Range * key_range = &hyperrectangle[element.key_column]; /// The case when the column is wrapped in a chain of possibly monotonic functions. diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 018cec51e7c..fe01251e58a 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -379,7 +379,8 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte blobs_paths = {blob_path}; } -void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) +void StorageAzureBlobConfiguration::addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & /* format */, ContextPtr context) { if (tryGetNamedCollectionWithOverrides(args, context)) { diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h index 8040d433d99..c12ff81197d 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h @@ -26,6 +26,7 @@ public: const Paths & getPaths() const override { return blobs_paths; } Paths & getPaths() override { return blobs_paths; } + void setPaths(const Paths & paths) override { blobs_paths = paths; } String getDataSourceDescription() override { return fs::path(connection_url) / container; } String getNamespace() const override { return container; } @@ -36,7 +37,8 @@ public: void fromNamedCollection(const NamedCollection & collection) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; - static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context); + static void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context); protected: using AzureClient = Azure::Storage::Blobs::BlobContainerClient; diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 0a49ba5e251..220857fead6 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -139,7 +139,11 @@ void StorageHDFSConfiguration::setURL(const std::string & url_) LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path); } -void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) +void StorageHDFSConfiguration::addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) { if (tryGetNamedCollectionWithOverrides(args, context)) { @@ -152,10 +156,13 @@ void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & st else { size_t count = args.size(); - if (count == 0 || count > 3) + if (count == 0 || count > 4) + { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Expected 1 to 3 arguments in table function, got {}", count); + "Expected 1 to 4 arguments in table function, got {}", count); + } + auto format_literal = std::make_shared(format_); auto structure_literal = std::make_shared(structure_); /// hdfs(url) @@ -168,15 +175,18 @@ void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & st /// hdfs(url, format) else if (count == 2) { + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args.back() = format_literal; args.push_back(structure_literal); } - /// hdfs(url, format, compression_method) - else if (count == 3) + /// hdfs(url, format, structure) + /// hdfs(url, format, structure, compression_method) + else if (count >= 3) { - auto compression_method = args.back(); - args.pop_back(); - args.push_back(structure_literal); - args.push_back(compression_method); + if (checkAndGetLiteralArgument(args[1], "format") == "auto") + args[1] = format_literal; + if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + args[2] = structure_literal; } } } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 7dc1f8073c1..23a7e8e4549 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -21,6 +21,7 @@ public: const Paths & getPaths() const override { return paths; } Paths & getPaths() override { return paths; } + void setPaths(const Paths & paths_) override { paths = paths_; } String getNamespace() const override { return ""; } String getDataSourceDescription() override { return url; } @@ -29,7 +30,8 @@ public: ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } - static void addStructureToArgs(ASTs &, const String &, ContextPtr); + static void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context); std::string getPathWithoutGlob() const override; diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp index f2595299430..89d33191f41 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp @@ -9,6 +9,7 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, const String & name_, + const Names & columns_to_read, const NamesAndTypesList & virtual_columns_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, @@ -24,7 +25,7 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage( CurrentMetrics::Metric metric_threads_count_, CurrentMetrics::Metric metric_threads_active_, CurrentMetrics::Metric metric_threads_scheduled_) - : SourceStepWithFilter(DataStream{.header = info_.source_header}, info_.requested_columns.getNames(), query_info_, storage_snapshot_, context_) + : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) , info(std::move(info_)) diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h index 44b992f8c12..c0dd02d75f8 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h +++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h @@ -15,6 +15,7 @@ public: ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, const String & name_, + const Names & columns_to_read, const NamesAndTypesList & virtual_columns_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 132a5045d8a..f532af24017 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -330,7 +330,8 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_ keys = {url.key}; } -void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context) +void StorageS3Configuration::addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) { if (tryGetNamedCollectionWithOverrides(args, context)) { @@ -348,6 +349,7 @@ void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & stru if (count == 0 || count > 6) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to 6 arguments in table function, got {}", count); + auto format_literal = std::make_shared(format_); auto structure_literal = std::make_shared(structure_); /// s3(s3_url) diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index f9614da4b95..ff5e8680e66 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -22,6 +22,7 @@ public: const Paths & getPaths() const override { return keys; } Paths & getPaths() override { return keys; } + void setPaths(const Paths & paths) override { keys = paths; } String getNamespace() const override { return url.bucket; } String getDataSourceDescription() override; @@ -33,7 +34,8 @@ public: bool isStaticConfiguration() const override { return static_configuration; } ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT - static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context); + static void addStructureAndFormatToArgs( + ASTs & args, const String & structure, const String & format, ContextPtr context); private: void fromNamedCollection(const NamedCollection & collection) override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 84810c117c9..8fc3de4de1b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -158,10 +158,13 @@ void StorageObjectStorage::read( const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; + LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII SOURCE HEADER: {}", read_from_format_info.source_header.dumpStructure()); + LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII FORMAT HEADER: {}", read_from_format_info.format_header.dumpStructure()); auto read_step = std::make_unique( object_storage, configuration, getName(), + column_names, getVirtualsList(), query_info, storage_snapshot, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 48825c6a012..647575aaa90 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -32,6 +32,7 @@ public: virtual const Paths & getPaths() const = 0; virtual Paths & getPaths() = 0; + virtual void setPaths(const Paths & paths) = 0; virtual String getDataSourceDescription() = 0; virtual String getNamespace() const = 0; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index fd3ac58b1a2..30316af987c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -106,8 +106,21 @@ std::shared_ptr StorageObjectStorageSourc } else { + ConfigurationPtr copy_configuration = configuration->clone(); + auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + if (filter_dag) + { + auto keys = configuration->getPaths(); + std::vector paths; + paths.reserve(keys.size()); + for (const auto & key : keys) + paths.push_back(fs::path(configuration->getNamespace()) / key); + VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + copy_configuration->setPaths(keys); + } + return std::make_shared( - object_storage, configuration, virtual_columns, read_keys, + object_storage, copy_configuration, virtual_columns, read_keys, settings.ignore_non_existent_file, file_progress_callback); } } @@ -247,6 +260,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade const auto max_parsing_threads = need_only_count ? std::optional(1) : std::nullopt; read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes); + LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII HEADER: {}", read_from_format_info.format_header.dumpStructure()); auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, getContext(), max_block_size, format_settings, max_parsing_threads, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 3d4cc4fbd20..28962aadecd 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -45,7 +45,7 @@ public: void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override { - setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.source_header); + setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); } Chunk generate() override; diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index d407017d5f7..9223642a7e6 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -65,9 +65,9 @@ std::vector TableFunctionObjectStorage< template void TableFunctionObjectStorage::updateStructureAndFormatArgumentsIfNeeded( - ASTs & args, const String & structure, const String & /* format */, const ContextPtr & context) + ASTs & args, const String & structure, const String & format, const ContextPtr & context) { - Configuration::addStructureToArgs(args, structure, context); + Configuration::addStructureAndFormatToArgs(args, structure, format, context); } template From e8f02af78c418f7c0a521bd48d49fcfb91db455f Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Tue, 9 Apr 2024 09:49:32 +0000 Subject: [PATCH 078/651] fix part splitter wrongly add ranges with undefined end mark value to non-intersect part Signed-off-by: Duc Canh Le --- src/Processors/QueryPlan/PartsSplitter.cpp | 11 ++++------ .../03033_final_undefined_last_mark.reference | 1 + .../03033_final_undefined_last_mark.sql | 21 +++++++++++++++++++ 3 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/03033_final_undefined_last_mark.reference create mode 100644 tests/queries/0_stateless/03033_final_undefined_last_mark.sql diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 2af1bcb0260..d3425bce2a3 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -609,14 +609,11 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts, } /// Process parts ranges with undefined value at end mark - bool is_intersecting = part_index_start_to_range.size() > 1; + /// The last parts ranges could be non-intersect only if: (1) there is only one part range left, (2) it belongs to a non-L0 part, + /// and (3) the begin value of this range is larger than the largest end value of all previous ranges. This is too complicated + /// to check, so we just add the last part ranges to the intersecting ranges. for (const auto & [part_range_index, mark_range] : part_index_start_to_range) - { - if (is_intersecting) - add_intersecting_range(part_range_index.part_index, mark_range); - else - add_non_intersecting_range(part_range_index.part_index, mark_range); - } + add_intersecting_range(part_range_index.part_index, mark_range); auto && non_intersecting_ranges_in_data_parts = std::move(non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); auto && intersecting_ranges_in_data_parts = std::move(intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference new file mode 100644 index 00000000000..7b82946b108 --- /dev/null +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference @@ -0,0 +1 @@ +GOOD 11338881281426660955 14765404159170880511 diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql new file mode 100644 index 00000000000..183406f803c --- /dev/null +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql @@ -0,0 +1,21 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings + +CREATE TABLE account_test +( + `id` UInt64, + `row_ver` UInt64, +) +ENGINE = ReplacingMergeTree(row_ver) +PARTITION BY id % 64 +ORDER BY id +SETTINGS index_granularity = 512, index_granularity_bytes = 0, + min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, + min_rows_for_compact_part = 0, min_bytes_for_compact_part = 0; + +INSERT INTO account_test + SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 50000; + +INSERT INTO account_test + SELECT * FROM (SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 1000) WHERE row_ver > 14098131981223776000; + +SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1; From f41d88b990052e06ae7dd87826662d664c4f54e8 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 10 Apr 2024 05:43:50 +0000 Subject: [PATCH 079/651] add reference query to test Signed-off-by: Duc Canh Le --- .../0_stateless/03033_final_undefined_last_mark.reference | 1 + tests/queries/0_stateless/03033_final_undefined_last_mark.sql | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference index 7b82946b108..bf0a25f24e4 100644 --- a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference @@ -1 +1,2 @@ GOOD 11338881281426660955 14765404159170880511 +GOOD 11338881281426660955 14765404159170880511 diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql index 183406f803c..2c13da42ca4 100644 --- a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql @@ -18,4 +18,6 @@ INSERT INTO account_test INSERT INTO account_test SELECT * FROM (SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 1000) WHERE row_ver > 14098131981223776000; +SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0; SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1; + From 14c461338b12719daa1dc044148f914fd6a5fac6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 10 Apr 2024 12:56:29 +0200 Subject: [PATCH 080/651] Replay ZK logs using keeper-bench --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 +- src/Common/ZooKeeper/ZooKeeperImpl.h | 3 +- utils/keeper-bench/CMakeLists.txt | 3 +- utils/keeper-bench/Generator.cpp | 194 +----- utils/keeper-bench/Generator.h | 18 - utils/keeper-bench/Runner.cpp | 821 ++++++++++++++++++++++++- utils/keeper-bench/Runner.h | 77 ++- utils/keeper-bench/main.cpp | 24 +- 8 files changed, 875 insertions(+), 269 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 2185d32e47a..ed7498b1ac9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1259,11 +1259,13 @@ void ZooKeeper::initFeatureFlags() void ZooKeeper::executeGenericRequest( const ZooKeeperRequestPtr & request, - ResponseCallback callback) + ResponseCallback callback, + WatchCallbackPtr watch) { RequestInfo request_info; request_info.request = request; request_info.callback = callback; + request_info.watch = watch; pushRequest(std::move(request_info)); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index cf331a03d06..8fdf0f97d9d 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -139,7 +139,8 @@ public: void executeGenericRequest( const ZooKeeperRequestPtr & request, - ResponseCallback callback); + ResponseCallback callback, + WatchCallbackPtr watch = nullptr); /// See the documentation about semantics of these methods in IKeeper class. diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 5514c34f4ef..4fe0d852fd2 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,5 +4,4 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE dbms) -target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) +target_link_libraries(keeper-bench PRIVATE dbms clickhouse_functions ch_contrib::rapidjson) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 2212f7158ae..cbf1bcdae23 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -40,54 +40,6 @@ std::string generateRandomString(size_t length) } } -void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) -{ - namespace fs = std::filesystem; - - auto promise = std::make_shared>(); - auto future = promise->get_future(); - - Strings children; - auto list_callback = [promise, &children] (const ListResponse & response) - { - children = response.names; - - promise->set_value(); - }; - zookeeper.list(path, ListRequestType::ALL, list_callback, nullptr); - future.get(); - - while (!children.empty()) - { - Coordination::Requests ops; - for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i) - { - removeRecursive(zookeeper, fs::path(path) / children.back()); - ops.emplace_back(makeRemoveRequest(fs::path(path) / children.back(), -1)); - children.pop_back(); - } - auto multi_promise = std::make_shared>(); - auto multi_future = multi_promise->get_future(); - - auto multi_callback = [multi_promise] (const MultiResponse &) - { - multi_promise->set_value(); - }; - zookeeper.multi(ops, multi_callback); - multi_future.get(); - } - auto remove_promise = std::make_shared>(); - auto remove_future = remove_promise->get_future(); - - auto remove_callback = [remove_promise] (const RemoveResponse &) - { - remove_promise->set_value(); - }; - - zookeeper.remove(path, -1, remove_callback); - remove_future.get(); -} - NumberGetter NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value) { @@ -603,148 +555,16 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) acl.id = "anyone"; default_acls.emplace_back(std::move(acl)); - static const std::string generator_key = "generator"; - - std::cerr << "---- Parsing setup ---- " << std::endl; - static const std::string setup_key = generator_key + ".setup"; - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(setup_key, keys); - for (const auto & key : keys) - { - if (key.starts_with("node")) - { - auto node_key = setup_key + "." + key; - auto parsed_root_node = parseNode(node_key, config); - const auto node = root_nodes.emplace_back(parsed_root_node); - - if (config.has(node_key + ".repeat")) - { - if (!node->name.isRandom()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); - - auto repeat_count = config.getUInt64(node_key + ".repeat"); - node->repeat_count = repeat_count; - for (size_t i = 1; i < repeat_count; ++i) - root_nodes.emplace_back(node->clone()); - } - - std::cerr << "Tree to create:" << std::endl; - - node->dumpTree(); - std::cerr << std::endl; - } - } - std::cerr << "---- Done parsing data setup ----\n" << std::endl; - std::cerr << "---- Collecting request generators ----" << std::endl; - static const std::string requests_key = generator_key + ".requests"; + static const std::string requests_key = "generator.requests"; request_getter = RequestGetter::fromConfig(requests_key, config); std::cerr << request_getter.description() << std::endl; std::cerr << "---- Done collecting request generators ----\n" << std::endl; } -std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) -{ - auto node = std::make_shared(); - node->name = StringGetter::fromConfig(key + ".name", config); - - if (config.has(key + ".data")) - node->data = StringGetter::fromConfig(key + ".data", config); - - Poco::Util::AbstractConfiguration::Keys node_keys; - config.keys(key, node_keys); - - for (const auto & node_key : node_keys) - { - if (!node_key.starts_with("node")) - continue; - - const auto node_key_string = key + "." + node_key; - auto child_node = parseNode(node_key_string, config); - node->children.push_back(child_node); - - if (config.has(node_key_string + ".repeat")) - { - if (!child_node->name.isRandom()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); - - auto repeat_count = config.getUInt64(node_key_string + ".repeat"); - child_node->repeat_count = repeat_count; - for (size_t i = 1; i < repeat_count; ++i) - node->children.push_back(child_node); - } - } - - return node; -} - -void Generator::Node::dumpTree(int level) const -{ - std::string data_string - = data.has_value() ? fmt::format("{}", data->description()) : "no data"; - - std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; - - std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; - - for (auto it = children.begin(); it != children.end();) - { - const auto & child = *it; - child->dumpTree(level + 1); - std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); - } -} - -std::shared_ptr Generator::Node::clone() const -{ - auto new_node = std::make_shared(); - new_node->name = name; - new_node->data = data; - new_node->repeat_count = repeat_count; - - // don't do deep copy of children because we will do clone only for root nodes - new_node->children = children; - - return new_node; -} - -void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const -{ - auto path = std::filesystem::path(parent_path) / name.getString(); - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); - future.get(); - - for (const auto & child : children) - child->createNode(zookeeper, path, acls); -} - void Generator::startup(Coordination::ZooKeeper & zookeeper) { - std::cerr << "---- Creating test data ----" << std::endl; - for (const auto & node : root_nodes) - { - auto node_name = node->name.getString(); - node->name.setString(node_name); - - std::string root_path = std::filesystem::path("/") / node_name; - std::cerr << "Cleaning up " << root_path << std::endl; - removeRecursive(zookeeper, root_path); - - node->createNode(zookeeper, "/", default_acls); - } - std::cerr << "---- Created test data ----\n" << std::endl; - std::cerr << "---- Initializing generators ----" << std::endl; - request_getter.startup(zookeeper); } @@ -752,15 +572,3 @@ Coordination::ZooKeeperRequestPtr Generator::generate() { return request_getter.getRequestGenerator()->generate(default_acls); } - -void Generator::cleanup(Coordination::ZooKeeper & zookeeper) -{ - std::cerr << "---- Cleaning up test data ----" << std::endl; - for (const auto & node : root_nodes) - { - auto node_name = node->name.getString(); - std::string root_path = std::filesystem::path("/") / node_name; - std::cerr << "Cleaning up " << root_path << std::endl; - removeRecursive(zookeeper, root_path); - } -} diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 5b4c05b2d8b..35dce1a95d9 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -173,27 +173,9 @@ public: void startup(Coordination::ZooKeeper & zookeeper); Coordination::ZooKeeperRequestPtr generate(); - void cleanup(Coordination::ZooKeeper & zookeeper); private: - struct Node - { - StringGetter name; - std::optional data; - std::vector> children; - size_t repeat_count = 0; - - std::shared_ptr clone() const; - - void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; - void dumpTree(int level = 0) const; - }; - - static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); std::uniform_int_distribution request_picker; - std::vector> root_nodes; RequestGetter request_getter; Coordination::ACLs default_acls; }; - -std::optional getGenerator(const std::string & name); diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index a4b579f1f7b..8b111f5adb9 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,14 +1,28 @@ #include "Runner.h" +#include +#include #include +#include "Common/ConcurrentBoundedQueue.h" +#include "Common/ZooKeeper/IKeeper.h" +#include "Common/ZooKeeper/ZooKeeperArgs.h" #include "Common/ZooKeeper/ZooKeeperCommon.h" #include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include -#include "IO/ReadBufferFromString.h" +#include "Core/ColumnWithTypeAndName.h" +#include "Core/ColumnsWithTypeAndName.h" +#include "IO/ReadBuffer.h" +#include "IO/ReadBufferFromFile.h" +#include "base/Decimal.h" +#include "base/types.h" +#include #include #include #include +#include +#include +#include namespace CurrentMetrics @@ -22,23 +36,41 @@ namespace DB::ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } Runner::Runner( std::optional concurrency_, const std::string & config_path, + const std::string & input_request_log_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, std::optional continue_on_error_, std::optional max_iterations_) - : info(std::make_shared()) + : input_request_log(input_request_log_) + , info(std::make_shared()) { DB::ConfigProcessor config_processor(config_path, true, false); - auto config = config_processor.loadConfig().configuration; + DB::ConfigurationPtr config = nullptr; + + if (!config_path.empty()) + { + config = config_processor.loadConfig().configuration; + + if (config->has("generator")) + generator.emplace(*config); + } + else + { + if (input_request_log.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both --config and --input_request_log cannot be empty"); + + if (!std::filesystem::exists(input_request_log)) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "File on path {} does not exist", input_request_log); + } - generator.emplace(*config); if (!hosts_strings_.empty()) { @@ -57,6 +89,8 @@ Runner::Runner( static constexpr uint64_t DEFAULT_CONCURRENCY = 1; if (concurrency_) concurrency = *concurrency_; + else if (!config) + concurrency = DEFAULT_CONCURRENCY; else concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY); std::cerr << "Concurrency: " << concurrency << std::endl; @@ -64,6 +98,8 @@ Runner::Runner( static constexpr uint64_t DEFAULT_ITERATIONS = 0; if (max_iterations_) max_iterations = *max_iterations_; + else if (!config) + max_iterations = DEFAULT_ITERATIONS; else max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS); std::cerr << "Iterations: " << max_iterations << std::endl; @@ -71,6 +107,8 @@ Runner::Runner( static constexpr double DEFAULT_DELAY = 1.0; if (delay_) delay = *delay_; + else if (!config) + delay = DEFAULT_DELAY; else delay = config->getDouble("report_delay", DEFAULT_DELAY); std::cerr << "Report delay: " << delay << std::endl; @@ -78,44 +116,48 @@ Runner::Runner( static constexpr double DEFAULT_TIME_LIMIT = 0.0; if (max_time_) max_time = *max_time_; + else if (!config) + max_time = DEFAULT_TIME_LIMIT; else max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT); std::cerr << "Time limit: " << max_time << std::endl; if (continue_on_error_) continue_on_error = *continue_on_error_; + else if (!config) + continue_on_error_ = false; else continue_on_error = config->getBool("continue_on_error", false); std::cerr << "Continue on error: " << continue_on_error << std::endl; - static const std::string output_key = "output"; - print_to_stdout = config->getBool(output_key + ".stdout", false); - std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; - - static const std::string output_file_key = output_key + ".file"; - if (config->has(output_file_key)) + if (config) { - if (config->has(output_file_key + ".path")) - { - file_output = config->getString(output_file_key + ".path"); - output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); - } - else - file_output = config->getString(output_file_key); + benchmark_context.initializeFromConfig(*config); - std::cerr << "Result file path: " << file_output->string() << std::endl; + static const std::string output_key = "output"; + print_to_stdout = config->getBool(output_key + ".stdout", false); + std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; + + static const std::string output_file_key = output_key + ".file"; + if (config->has(output_file_key)) + { + if (config->has(output_file_key + ".path")) + { + file_output = config->getString(output_file_key + ".path"); + output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); + } + else + file_output = config->getString(output_file_key); + + std::cerr << "Result file path: " << file_output->string() << std::endl; + } } std::cerr << "---- Run options ----\n" << std::endl; - - pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); - queue.emplace(concurrency); } void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config) { - ConnectionInfo default_connection_info; - const auto fill_connection_details = [&](const std::string & key, auto & connection_info) { if (config.has(key + ".secure")) @@ -328,9 +370,519 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re void Runner::runBenchmark() { + if (generator) + runBenchmarkWithGenerator(); + else + runBenchmarkFromLog(); +} + + +struct ZooKeeperRequestBlock +{ + explicit ZooKeeperRequestBlock(DB::Block block_) + : block(std::move(block_)) + , hostname_idx(block.getPositionByName("hostname")) // + , request_event_time_idx(block.getPositionByName("request_event_time")) // + , thread_id_idx(block.getPositionByName("thread_id")) // + , session_id_idx(block.getPositionByName("session_id")) // + , xid_idx(block.getPositionByName("xid")) // + , has_watch_idx(block.getPositionByName("has_watch")) + , op_num_idx(block.getPositionByName("op_num")) + , path_idx(block.getPositionByName("path")) + , data_idx(block.getPositionByName("data")) + , is_ephemeral_idx(block.getPositionByName("is_ephemeral")) + , is_sequential_idx(block.getPositionByName("is_sequential")) + , response_event_time_idx(block.getPositionByName("response_event_time")) // + , error_idx(block.getPositionByName("error")) + , requests_size_idx(block.getPositionByName("requests_size")) + , version_idx(block.getPositionByName("version")) + {} + + size_t rows() const + { + return block.rows(); + } + + UInt64 getExecutorId(size_t row) const + { + return getSessionId(row); + } + + std::string getHostname(size_t row) const + { + return getField(hostname_idx, row).safeGet(); + } + + UInt64 getThreadId(size_t row) const + { + return getField(thread_id_idx, row).safeGet(); + } + + DB::DateTime64 getRequestEventTime(size_t row) const + { + return getField(request_event_time_idx, row).safeGet(); + } + + DB::DateTime64 getResponseEventTime(size_t row) const + { + return getField(response_event_time_idx, row).safeGet(); + } + + Int64 getSessionId(size_t row) const + { + return getField(session_id_idx, row).safeGet(); + } + + Int64 getXid(size_t row) const + { + return getField(xid_idx, row).safeGet(); + } + + bool hasWatch(size_t row) const + { + return getField(has_watch_idx, row).safeGet(); + } + + Coordination::OpNum getOpNum(size_t row) const + { + return static_cast(getField(op_num_idx, row).safeGet()); + } + + bool isEphemeral(size_t row) const + { + return getField(is_ephemeral_idx, row).safeGet(); + } + + bool isSequential(size_t row) const + { + return getField(is_sequential_idx, row).safeGet(); + } + + std::string getPath(size_t row) const + { + return getField(path_idx, row).safeGet(); + } + + std::string getData(size_t row) const + { + return getField(data_idx, row).safeGet(); + } + + UInt64 getRequestsSize(size_t row) const + { + return getField(requests_size_idx, row).safeGet(); + } + + std::optional getVersion(size_t row) const + { + auto field = getField(version_idx, row); + if (field.isNull()) + return std::nullopt; + return static_cast(field.safeGet()); + } + + std::optional getError(size_t row) const + { + auto field = getField(error_idx, row); + if (field.isNull()) + return std::nullopt; + + return static_cast(field.safeGet()); + } +private: + DB::Field getField(size_t position, size_t row) const + { + DB::Field field; + block.getByPosition(position).column->get(row, field); + return field; + } + + DB::Block block; + size_t hostname_idx = 0; + size_t request_event_time_idx = 0; + size_t thread_id_idx = 0; + size_t session_id_idx = 0; + size_t xid_idx = 0; + size_t has_watch_idx = 0; + size_t op_num_idx = 0; + size_t path_idx = 0; + size_t data_idx = 0; + size_t is_ephemeral_idx = 0; + size_t is_sequential_idx = 0; + size_t response_event_time_idx = 0; + size_t error_idx = 0; + size_t requests_size_idx = 0; + size_t version_idx = 0; +}; + +struct RequestFromLog +{ + Coordination::ZooKeeperRequestPtr request; + std::optional expected_result; + int64_t session_id = 0; + size_t executor_id = 0; + bool has_watch = false; + DB::DateTime64 request_event_time; + DB::DateTime64 response_event_time; + std::shared_ptr connection; +}; + +struct ZooKeeperRequestFromLogReader +{ + ZooKeeperRequestFromLogReader(const std::string & input_request_log, DB::ContextPtr context) + { + std::optional format_settings; + + file_read_buf = std::make_unique(input_request_log); + auto compression_method = DB::chooseCompressionMethod(input_request_log, ""); + file_read_buf = DB::wrapReadBufferWithCompressionMethod(std::move(file_read_buf), compression_method); + + DB::SingleReadBufferIterator read_buffer_iterator(std::move(file_read_buf)); + auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, context); + + DB::ColumnsWithTypeAndName columns; + columns.reserve(columns_description.size()); + + for (const auto & column_description : columns_description) + columns.push_back(DB::ColumnWithTypeAndName{column_description.type, column_description.name}); + + header_block = std::move(columns); + + file_read_buf + = DB::wrapReadBufferWithCompressionMethod(std::make_unique(input_request_log), compression_method); + + input_format = DB::FormatFactory::instance().getInput( + format, + *file_read_buf, + header_block, + context, + context->getSettingsRef().max_block_size, + format_settings, + 1, + std::nullopt, + /*is_remote_fs*/ false, + DB::CompressionMethod::None, + false); + + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + } + + std::optional getNextRequest(bool for_multi = false) + { + RequestFromLog request_from_log; + + if (!current_block) + { + auto chunk = input_format->generate(); + + if (chunk.empty()) + return std::nullopt; + + current_block.emplace(header_block.cloneWithColumns(chunk.detachColumns())); + idx_in_block = 0; + } + + + request_from_log.expected_result = current_block->getError(idx_in_block); + request_from_log.session_id = current_block->getSessionId(idx_in_block); + request_from_log.has_watch = current_block->hasWatch(idx_in_block); + request_from_log.executor_id = current_block->getExecutorId(idx_in_block); + request_from_log.request_event_time = current_block->getRequestEventTime(idx_in_block); + request_from_log.response_event_time = current_block->getResponseEventTime(idx_in_block); + + const auto move_row_iterator = [&] + { + if (idx_in_block == current_block->rows() - 1) + current_block.reset(); + else + ++idx_in_block; + }; + + auto op_num = current_block->getOpNum(idx_in_block); + switch (op_num) + { + case Coordination::OpNum::Create: + { + auto create_request = std::make_shared(); + create_request->path = current_block->getPath(idx_in_block); + create_request->data = current_block->getData(idx_in_block); + create_request->is_ephemeral = current_block->isEphemeral(idx_in_block); + create_request->is_sequential = current_block->isSequential(idx_in_block); + request_from_log.request = create_request; + break; + } + case Coordination::OpNum::Set: + { + auto set_request = std::make_shared(); + set_request->path = current_block->getPath(idx_in_block); + set_request->data = current_block->getData(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + set_request->version = *version; + request_from_log.request = set_request; + break; + } + case Coordination::OpNum::Remove: + { + auto remove_request = std::make_shared(); + remove_request->path = current_block->getPath(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + remove_request->version = *version; + request_from_log.request = remove_request; + break; + } + case Coordination::OpNum::Check: + { + auto check_request = std::make_shared(); + check_request->path = current_block->getPath(idx_in_block); + if (auto version = current_block->getVersion(idx_in_block)) + check_request->version = *version; + request_from_log.request = check_request; + break; + } + case Coordination::OpNum::Sync: + { + auto sync_request = std::make_shared(); + sync_request->path = current_block->getPath(idx_in_block); + request_from_log.request = sync_request; + break; + } + case Coordination::OpNum::Get: + { + auto get_request = std::make_shared(); + get_request->path = current_block->getPath(idx_in_block); + request_from_log.request = get_request; + break; + } + case Coordination::OpNum::SimpleList: + case Coordination::OpNum::FilteredList: + { + auto list_request = std::make_shared(); + list_request->path = current_block->getPath(idx_in_block); + request_from_log.request = list_request; + break; + } + case Coordination::OpNum::Exists: + { + auto exists_request = std::make_shared(); + exists_request->path = current_block->getPath(idx_in_block); + request_from_log.request = exists_request; + break; + } + case Coordination::OpNum::Multi: + case Coordination::OpNum::MultiRead: + { + if (for_multi) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Nested multi requests are not allowed"); + + auto requests_size = current_block->getRequestsSize(idx_in_block); + + Coordination::Requests requests; + requests.reserve(requests_size); + move_row_iterator(); + + for (size_t i = 0; i < requests_size; ++i) + { + auto subrequest_from_log = getNextRequest(/*for_multi=*/true); + if (!subrequest_from_log) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to fetch subrequest for {}, subrequest index {}", op_num, i); + + requests.push_back(std::move(subrequest_from_log->request)); + + if (subrequest_from_log->session_id != request_from_log.session_id) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Session id mismatch for subrequest in {}, subrequest index {}", op_num, i); + + if (subrequest_from_log->executor_id != request_from_log.executor_id) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Executor id mismatch for subrequest in {}, subrequest index {}", op_num, i); + } + + request_from_log.request = std::make_shared(requests, default_acls); + + return request_from_log; + } + default: + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unsupported operation {} ({})", op_num, static_cast(op_num)); + } + + move_row_iterator(); + + return request_from_log; + } + +private: + DB::Block header_block; + + std::unique_ptr file_read_buf; + DB::InputFormatPtr input_format; + + std::optional current_block; + size_t idx_in_block = 0; + + Coordination::ACLs default_acls; +}; + + +namespace +{ + + +struct RequestFromLogStats +{ + struct Stats + { + std::atomic total = 0; + std::atomic unexpected_results = 0; + }; + + Stats write_requests; + Stats read_requests; +}; + +void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type) +{ + std::cerr << fmt::format( + "{} requests: {} total, {} with unexpected results ({:.4}%)", + type, + stats_for_type.total, + stats_for_type.unexpected_results, + static_cast(stats_for_type.unexpected_results) / stats_for_type.total * 100) + << std::endl; +}; + +void requestFromLogExecutor(std::shared_ptr> queue, RequestFromLogStats & request_stats) +{ + RequestFromLog request_from_log; + std::optional> last_request; + while (queue->pop(request_from_log)) + { + auto request_promise = std::make_shared>(); + last_request = request_promise->get_future(); + Coordination::ResponseCallback callback + = [&, request_promise, request = request_from_log.request, expected_result = request_from_log.expected_result]( + const Coordination::Response & response) mutable + { + auto & stats = request->isReadRequest() ? request_stats.read_requests : request_stats.write_requests; + + stats.total.fetch_add(1, std::memory_order_relaxed); + + if (*expected_result != response.error) + stats.unexpected_results.fetch_add(1, std::memory_order_relaxed); + + //if (!expected_result) + // return; + + //if (*expected_result != response.error) + // std::cerr << fmt::format( + // "Unexpected result for {}, got {}, expected {}", request->getOpNum(), response.error, *expected_result) + // << std::endl; + + request_promise->set_value(); + }; + + Coordination::WatchCallbackPtr watch; + if (request_from_log.has_watch) + watch = std::make_shared([](const Coordination::WatchResponse &) {}); + + request_from_log.connection->executeGenericRequest(request_from_log.request, callback, watch); + } + + if (last_request) + last_request->wait(); +} + +} + +void Runner::runBenchmarkFromLog() +{ + std::cerr << fmt::format("Running benchmark using requests from {}", input_request_log) << std::endl; + + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); + + shared_context = DB::Context::createShared(); + global_context = DB::Context::createGlobal(shared_context.get()); + global_context->makeGlobalContext(); + DB::registerFormats(); + + /// Randomly choosing connection index + pcg64 rng(randomSeed()); + std::uniform_int_distribution connection_distribution(0, connection_infos.size() - 1); + + std::unordered_map> zookeeper_connections; + auto get_zookeeper_connection = [&](int64_t session_id) + { + if (auto it = zookeeper_connections.find(session_id); it != zookeeper_connections.end() && !it->second->isExpired()) + return it->second; + + auto connection_idx = connection_distribution(rng); + auto zk_connection = getConnection(connection_infos[connection_idx], connection_idx); + zookeeper_connections.insert_or_assign(session_id, zk_connection); + return zk_connection; + }; + + RequestFromLogStats stats; + + + std::unordered_map>> executor_id_to_queue; + + SCOPE_EXIT({ + for (const auto & [executor_id, executor_queue] : executor_id_to_queue) + executor_queue->finish(); + + pool->wait(); + + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + }); + + auto push_request = [&](RequestFromLog request) + { + if (auto it = executor_id_to_queue.find(request.executor_id); it != executor_id_to_queue.end()) + { + auto success = it->second->push(std::move(request)); + if (!success) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); + return; + } + + auto executor_queue = std::make_shared>(std::numeric_limits().max()); + executor_id_to_queue.emplace(request.executor_id, executor_queue); + auto scheduled = pool->trySchedule([&, executor_queue]() mutable + { + requestFromLogExecutor(std::move(executor_queue), stats); + }); + + if (!scheduled) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to schedule worker, try to increase concurrency parameter"); + + auto success = executor_queue->push(std::move(request)); + if (!success) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); + }; + + { + auto setup_connection = getConnection(connection_infos[0], 0); + benchmark_context.startup(*setup_connection); + } + + ZooKeeperRequestFromLogReader request_reader(input_request_log, global_context); + while (auto request_from_log = request_reader.getNextRequest()) + { + request_from_log->connection = get_zookeeper_connection(request_from_log->session_id); + push_request(std::move(*request_from_log)); + } +} + +void Runner::runBenchmarkWithGenerator() +{ + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency); + queue.emplace(concurrency); createConnections(); std::cerr << "Preparing to run\n"; + benchmark_context.startup(*connections[0]); generator->startup(*connections[0]); std::cerr << "Prepared\n"; @@ -458,8 +1010,225 @@ std::vector> Runner::refreshConnections Runner::~Runner() { - queue->clearAndFinish(); + if (queue) + queue->clearAndFinish(); shutdown = true; - pool->wait(); - generator->cleanup(*connections[0]); + + if (pool) + pool->wait(); + + auto connection = getConnection(connection_infos[0], 0); + benchmark_context.cleanup(*connection); +} + +namespace +{ + +void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) +{ + namespace fs = std::filesystem; + + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + Strings children; + auto list_callback = [promise, &children] (const Coordination::ListResponse & response) + { + children = response.names; + promise->set_value(); + }; + zookeeper.list(path, Coordination::ListRequestType::ALL, list_callback, nullptr); + future.get(); + + std::span children_span(children); + while (!children_span.empty()) + { + Coordination::Requests ops; + for (size_t i = 0; i < 1000 && !children.empty(); ++i) + { + removeRecursive(zookeeper, fs::path(path) / children.back()); + ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); + children_span = children_span.subspan(0, children_span.size() - 1); + } + auto multi_promise = std::make_shared>(); + auto multi_future = multi_promise->get_future(); + + auto multi_callback = [multi_promise] (const Coordination::MultiResponse &) + { + multi_promise->set_value(); + }; + zookeeper.multi(ops, multi_callback); + multi_future.get(); + } + auto remove_promise = std::make_shared>(); + auto remove_future = remove_promise->get_future(); + + auto remove_callback = [remove_promise] (const Coordination::RemoveResponse &) + { + remove_promise->set_value(); + }; + + zookeeper.remove(path, -1, remove_callback); + remove_future.get(); +} + +} + +void BenchmarkContext::initializeFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + + std::cerr << "---- Parsing setup ---- " << std::endl; + static const std::string setup_key = "setup"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(setup_key, keys); + for (const auto & key : keys) + { + if (key.starts_with("node")) + { + auto node_key = setup_key + "." + key; + auto parsed_root_node = parseNode(node_key, config); + const auto node = root_nodes.emplace_back(parsed_root_node); + + if (config.has(node_key + ".repeat")) + { + if (!node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); + + auto repeat_count = config.getUInt64(node_key + ".repeat"); + node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + root_nodes.emplace_back(node->clone()); + } + + std::cerr << "Tree to create:" << std::endl; + + node->dumpTree(); + std::cerr << std::endl; + } + } + std::cerr << "---- Done parsing data setup ----\n" << std::endl; +} + +std::shared_ptr BenchmarkContext::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + auto node = std::make_shared(); + node->name = StringGetter::fromConfig(key + ".name", config); + + if (config.has(key + ".data")) + node->data = StringGetter::fromConfig(key + ".data", config); + + Poco::Util::AbstractConfiguration::Keys node_keys; + config.keys(key, node_keys); + + for (const auto & node_key : node_keys) + { + if (!node_key.starts_with("node")) + continue; + + const auto node_key_string = key + "." + node_key; + auto child_node = parseNode(node_key_string, config); + node->children.push_back(child_node); + + if (config.has(node_key_string + ".repeat")) + { + if (!child_node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); + + auto repeat_count = config.getUInt64(node_key_string + ".repeat"); + child_node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + node->children.push_back(child_node); + } + } + + return node; +} + +void BenchmarkContext::Node::dumpTree(int level) const +{ + std::string data_string + = data.has_value() ? fmt::format("{}", data->description()) : "no data"; + + std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; + + std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; + + for (auto it = children.begin(); it != children.end();) + { + const auto & child = *it; + child->dumpTree(level + 1); + std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); + } +} + +std::shared_ptr BenchmarkContext::Node::clone() const +{ + auto new_node = std::make_shared(); + new_node->name = name; + new_node->data = data; + new_node->repeat_count = repeat_count; + + // don't do deep copy of children because we will do clone only for root nodes + new_node->children = children; + + return new_node; +} + +void BenchmarkContext::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const +{ + auto path = std::filesystem::path(parent_path) / name.getString(); + auto promise = std::make_shared>(); + auto future = promise->get_future(); + auto create_callback = [promise] (const Coordination::CreateResponse & response) + { + if (response.error != Coordination::Error::ZOK) + promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); + else + promise->set_value(); + }; + zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); + future.get(); + + for (const auto & child : children) + child->createNode(zookeeper, path, acls); +} + +void BenchmarkContext::startup(Coordination::ZooKeeper & zookeeper) +{ + if (root_nodes.empty()) + return; + + std::cerr << "---- Creating test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + node->name.setString(node_name); + + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + + node->createNode(zookeeper, "/", default_acls); + } + std::cerr << "---- Created test data ----\n" << std::endl; +} + +void BenchmarkContext::cleanup(Coordination::ZooKeeper & zookeeper) +{ + if (root_nodes.empty()) + return; + + std::cerr << "---- Cleaning up test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + } } diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index 4f4a75e6ecf..0c646eb2166 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -1,5 +1,5 @@ #pragma once -#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include "Common/ZooKeeper/ZooKeeperArgs.h" #include #include "Generator.h" #include @@ -12,6 +12,7 @@ #include #include +#include "Interpreters/Context.h" #include "Stats.h" #include @@ -19,12 +20,40 @@ using Ports = std::vector; using Strings = std::vector; +struct BenchmarkContext +{ +public: + void initializeFromConfig(const Poco::Util::AbstractConfiguration & config); + + void startup(Coordination::ZooKeeper & zookeeper); + void cleanup(Coordination::ZooKeeper & zookeeper); +private: + struct Node + { + StringGetter name; + std::optional data; + std::vector> children; + size_t repeat_count = 0; + + std::shared_ptr clone() const; + + void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; + void dumpTree(int level = 0) const; + }; + + static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::vector> root_nodes; + Coordination::ACLs default_acls; +}; + class Runner { public: Runner( std::optional concurrency_, const std::string & config_path, + const std::string & input_request_log_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, @@ -44,8 +73,30 @@ public: ~Runner(); private: + struct ConnectionInfo + { + std::string host; + + bool secure = false; + int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; + int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; + bool use_compression = false; + + size_t sessions = 1; + }; + void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config); + void runBenchmarkWithGenerator(); + void runBenchmarkFromLog(); + + void createConnections(); + std::vector> refreshConnections(); + std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); + + std::string input_request_log; + size_t concurrency = 1; std::optional pool; @@ -54,7 +105,8 @@ private: double max_time = 0; double delay = 1; bool continue_on_error = false; - std::atomic max_iterations = 0; + size_t max_iterations = 0; + std::atomic requests_executed = 0; std::atomic shutdown = false; @@ -71,25 +123,14 @@ private: using Queue = ConcurrentBoundedQueue; std::optional queue; - struct ConnectionInfo - { - std::string host; - - bool secure = false; - int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; - int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; - int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; - bool use_compression = false; - - size_t sessions = 1; - }; - std::mutex connection_mutex; + ConnectionInfo default_connection_info; std::vector connection_infos; std::vector> connections; std::unordered_map connections_to_info_map; - void createConnections(); - std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); - std::vector> refreshConnections(); + DB::SharedContextHolder shared_context; + DB::ContextMutablePtr global_context; + + BenchmarkContext benchmark_context; }; diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 0753d66850f..45fc28f3bca 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -1,8 +1,6 @@ #include #include #include "Runner.h" -#include "Stats.h" -#include "Generator.h" #include "Common/Exception.h" #include #include @@ -27,6 +25,10 @@ int main(int argc, char *argv[]) bool print_stacktrace = true; + //Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + //Poco::Logger::root().setChannel(channel); + //Poco::Logger::root().setLevel("trace"); + try { using boost::program_options::value; @@ -34,12 +36,13 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() ("help", "produce help message") - ("config", value()->default_value(""), "yaml/xml file containing configuration") - ("concurrency,c", value(), "number of parallel queries") - ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value(), "amount of queries to be executed") - ("time-limit,t", value(), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") + ("config", value()->default_value(""), "yaml/xml file containing configuration") + ("input-request-log", value()->default_value(""), "log of requests that will be replayed") + ("concurrency,c", value(), "number of parallel queries") + ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value(), "amount of queries to be executed") + ("time-limit,t", value(), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") ("continue_on_errors", "continue testing even if a query fails") ; @@ -56,6 +59,7 @@ int main(int argc, char *argv[]) Runner runner(valueToOptional(options["concurrency"]), options["config"].as(), + options["input-request-log"].as(), options["hosts"].as(), valueToOptional(options["time-limit"]), valueToOptional(options["report-delay"]), @@ -66,9 +70,9 @@ int main(int argc, char *argv[]) { runner.runBenchmark(); } - catch (const DB::Exception & e) + catch (...) { - std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl; + std::cout << "Got exception while trying to run benchmark: " << DB::getCurrentExceptionMessage(true) << std::endl; } return 0; From 652796acd6a10515e862260d18e002bae27f3c85 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 15 Apr 2024 16:37:38 +0100 Subject: [PATCH 081/651] Fix MergeTree with HDFS --- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 38 +++++++++++++++---- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 16 ++++++-- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 8bfba6fcfad..82c9a6c6c21 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -34,15 +34,21 @@ void HDFSObjectStorage::startup() ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { /// what ever data_source_description.description value is, consider that key as relative key - return ObjectStorageKey::createAsRelative(hdfs_root_path, getRandomASCIIString(32)); + chassert(data_directory.starts_with("/")); + return ObjectStorageKey::createAsRelative( + fs::path(url_without_path) / data_directory.substr(1), getRandomASCIIString(32)); } bool HDFSObjectStorage::exists(const StoredObject & object) const { + std::string path = object.remote_path; + if (path.starts_with(url_without_path)) + path = path.substr(url_without_path.size()); + // const auto & path = object.remote_path; // const size_t begin_of_path = path.find('/', path.find("//") + 2); // const String remote_fs_object_path = path.substr(begin_of_path); - return (0 == hdfsExists(hdfs_fs.get(), object.remote_path.c_str())); + return (0 == hdfsExists(hdfs_fs.get(), path.c_str())); } std::unique_ptr HDFSObjectStorage::readObject( /// NOLINT @@ -51,7 +57,14 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLIN std::optional, std::optional) const { - return std::make_unique(hdfs_root_path, object.remote_path, config, patchSettings(read_settings)); + std::string path = object.remote_path; + if (path.starts_with(url)) + path = path.substr(url.size()); + if (path.starts_with("/")) + path.substr(1); + + return std::make_unique( + fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings)); } std::unique_ptr HDFSObjectStorage::readObjects( /// NOLINT @@ -69,8 +82,13 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI // auto hdfs_path = path.substr(begin_of_path); // auto hdfs_uri = path.substr(0, begin_of_path); + std::string path = object_.remote_path; + if (path.starts_with(url)) + path = path.substr(url.size()); + if (path.starts_with("/")) + path.substr(1); return std::make_unique( - hdfs_root_path, object_.remote_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); + fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); }; return std::make_unique( @@ -89,8 +107,11 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL ErrorCodes::UNSUPPORTED_METHOD, "HDFS API doesn't support custom attributes/metadata for stored objects"); - auto path = object.remote_path.starts_with('/') ? object.remote_path.substr(1) : object.remote_path; - path = fs::path(hdfs_root_path) / path; + std::string path = object.remote_path; + if (path.starts_with("/")) + path = path.substr(1); + if (!path.starts_with(url)) + path = fs::path(url) / path; /// Single O_WRONLY in libhdfs adds O_TRUNC return std::make_unique( @@ -102,8 +123,9 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL /// Remove file. Throws exception if file doesn't exists or it's a directory. void HDFSObjectStorage::removeObject(const StoredObject & object) { - const auto & path = object.remote_path; - // const size_t begin_of_path = path.find('/', path.find("//") + 2); + auto path = object.remote_path; + if (path.starts_with(url_without_path)) + path = path.substr(url_without_path.size()); /// Add path from root to file name int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0); diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 24642ec635a..8987fa5eaf1 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -40,15 +40,21 @@ public: , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) - , hdfs_root_path(hdfs_root_path_) { + const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2); + url = hdfs_root_path_; + url_without_path = url.substr(0, begin_of_path); + if (begin_of_path < url.size()) + data_directory = url.substr(begin_of_path); + else + data_directory = "/"; } std::string getName() const override { return "HDFSObjectStorage"; } - std::string getCommonKeyPrefix() const override { return hdfs_root_path; } + std::string getCommonKeyPrefix() const override { return url; } - std::string getDescription() const override { return hdfs_root_path; } + std::string getDescription() const override { return url; } ObjectStorageType getType() const override { return ObjectStorageType::HDFS; } @@ -116,7 +122,9 @@ private: HDFSBuilderWrapper hdfs_builder; HDFSFSPtr hdfs_fs; SettingsPtr settings; - const std::string hdfs_root_path; + std::string url; + std::string url_without_path; + std::string data_directory; }; } From ccee2d668793370c3f947a4be24d1edbabba1724 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 15 Apr 2024 23:28:14 +0100 Subject: [PATCH 082/651] Fix parsing --- src/Storages/ObjectStorage/HDFS/Configuration.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 220857fead6..e12c2f15b28 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -63,9 +63,6 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) { - std::string url_str; - url_str = checkAndGetLiteralArgument(args[0], "url"); - const size_t max_args_num = with_structure ? 4 : 3; if (!args.size() || args.size() > max_args_num) { @@ -73,6 +70,9 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit "Expected not more than {} arguments", max_args_num); } + std::string url_str; + url_str = checkAndGetLiteralArgument(args[0], "url"); + if (args.size() > 1) { args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); From 11be538ac870d231a13a2648038ea1b469f73a08 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 16 Apr 2024 10:20:56 +0100 Subject: [PATCH 083/651] Fix several tests --- src/Disks/ObjectStorages/S3/diskSettings.cpp | 8 +++++-- src/Disks/ObjectStorages/S3/diskSettings.h | 3 ++- .../ObjectStorage/AzureBlob/Configuration.cpp | 7 +++--- .../ObjectStorage/HDFS/Configuration.cpp | 2 +- .../ObjectStorage/S3/Configuration.cpp | 2 +- .../StorageObjectStorageSink.cpp | 3 +-- src/Storages/S3Queue/S3QueueSource.cpp | 14 ++++++++++++ src/Storages/S3Queue/S3QueueSource.h | 1 + src/Storages/StorageS3Settings.cpp | 22 +++++++++++-------- src/Storages/StorageS3Settings.h | 10 +++++---- 10 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 9bd4bf699e8..2bca7df7db9 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -36,11 +36,15 @@ extern const int NO_ELEMENTS_IN_CONFIG; } std::unique_ptr getSettings( - const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context, + bool validate_settings) { const Settings & settings = context->getSettingsRef(); - auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_"); + auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings); auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); + return std::make_unique( request_settings, auth_settings, diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index 5b655f35508..11ac64ce913 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -17,7 +17,8 @@ struct S3ObjectStorageSettings; std::unique_ptr getSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context); + ContextPtr context, + bool validate_settings = true); std::unique_ptr getClient( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index fe01251e58a..44ace9c3b65 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -282,12 +282,11 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte auto is_format_arg = [] (const std::string & s) -> bool { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); + return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s)); }; if (engine_args.size() == 4) { - //'c1 UInt64, c2 UInt64 auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); if (is_format_arg(fourth_arg)) { @@ -298,7 +297,9 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte if (with_structure) structure = fourth_arg; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key"); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unknown format or account name specified without account key: {}", fourth_arg); } } else if (engine_args.size() == 5) diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index e12c2f15b28..af191070329 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -64,7 +64,7 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) { const size_t max_args_num = with_structure ? 4 : 3; - if (!args.size() || args.size() > max_args_num) + if (args.empty() || args.size() > max_args_num) { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected not more than {} arguments", max_args_num); diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index f532af24017..46be0a01862 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -77,7 +77,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, const auto & config = context->getConfigRef(); const std::string config_prefix = "s3."; - auto s3_settings = getSettings(config, config_prefix, context); + auto s3_settings = getSettings(config, config_prefix, context, false); /// FIXME: add a setting auth_settings.updateFrom(s3_settings->auth_settings); s3_settings->auth_settings = auth_settings; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index cf1c583ca62..8381737a4f5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -96,10 +96,9 @@ void StorageObjectStorageSink::finalize() void StorageObjectStorageSink::release() { writer.reset(); - write_buf->finalize(); + write_buf.reset(); } - PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, StorageObjectStorageConfigurationPtr configuration_, diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 8e7155205c4..7c6d952d181 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -197,8 +197,22 @@ String StorageS3QueueSource::getName() const return name; } +void StorageS3QueueSource::lazyInitialize() +{ + if (initialized) + return; + + internal_source->lazyInitialize(processing_id); + reader = std::move(internal_source->reader); + if (reader) + reader_future = std::move(internal_source->reader_future); + initialized = true; +} + Chunk StorageS3QueueSource::generate() { + lazyInitialize(); + while (true) { if (!reader) diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 8c785e683c2..c1b45108b36 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -117,6 +117,7 @@ private: void applyActionAfterProcessing(const String & path); void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void lazyInitialize(); }; } diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 2780249e3fd..b767805f637 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -18,18 +18,20 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; } -S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings) +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings, bool validate_settings) { updateFromSettings(settings, false); - validate(); + if (validate_settings) + validate(); } S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix) - : PartUploadSettings(settings) + String setting_name_prefix, + bool validate_settings) + : PartUploadSettings(settings, validate_settings) { String key = config_prefix + "." + setting_name_prefix; strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size); @@ -46,7 +48,8 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( storage_class_name = config.getString(config_prefix + ".s3_storage_class", storage_class_name); storage_class_name = Poco::toUpperInPlace(storage_class_name); - validate(); + if (validate_settings) + validate(); } S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection) @@ -170,8 +173,8 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() } -S3Settings::RequestSettings::RequestSettings(const Settings & settings) - : upload_settings(settings) +S3Settings::RequestSettings::RequestSettings(const Settings & settings, bool validate_settings) + : upload_settings(settings, validate_settings) { updateFromSettingsImpl(settings, false); } @@ -190,8 +193,9 @@ S3Settings::RequestSettings::RequestSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix) - : upload_settings(config, config_prefix, settings, setting_name_prefix) + String setting_name_prefix, + bool validate_settings) + : upload_settings(config, config_prefix, settings, setting_name_prefix, validate_settings) { String key = config_prefix + "." + setting_name_prefix; max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index e09be8654e7..c3bc8aa6ed6 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -44,13 +44,14 @@ struct S3Settings private: PartUploadSettings() = default; - explicit PartUploadSettings(const Settings & settings); + explicit PartUploadSettings(const Settings & settings, bool validate_settings = true); explicit PartUploadSettings(const NamedCollection & collection); PartUploadSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix = {}); + String setting_name_prefix = {}, + bool validate_settings = true); friend struct RequestSettings; }; @@ -78,7 +79,7 @@ struct S3Settings void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } RequestSettings() = default; - explicit RequestSettings(const Settings & settings); + explicit RequestSettings(const Settings & settings, bool validate_settings = true); explicit RequestSettings(const NamedCollection & collection); /// What's the setting_name_prefix, and why do we need it? @@ -92,7 +93,8 @@ struct S3Settings const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings, - String setting_name_prefix = {}); + String setting_name_prefix = {}, + bool validate_settings = true); void updateFromSettingsIfChanged(const Settings & settings); From 4e1005bc43fabce6baf28f5f91b8a6db0315cc7d Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 17 Apr 2024 14:13:21 +0100 Subject: [PATCH 084/651] Fix s3 throttler --- src/Storages/ObjectStorage/S3/Configuration.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 46be0a01862..4c9e49d0705 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -79,7 +79,9 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, auto s3_settings = getSettings(config, config_prefix, context, false); /// FIXME: add a setting + request_settings.updateFromSettingsIfChanged(context->getSettingsRef()); auth_settings.updateFrom(s3_settings->auth_settings); + s3_settings->auth_settings = auth_settings; s3_settings->request_settings = request_settings; From 6bb3ad3133e3c7c767048bb32d85276bed726247 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 17 Apr 2024 13:15:07 +0000 Subject: [PATCH 085/651] Save the stacktrace of thread waiting on failing AsyncLoader job into exception --- src/Common/AsyncLoader.cpp | 3 ++- src/Common/ErrorCodes.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 3bec30893b9..29ea59b82ed 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int ASYNC_LOAD_CYCLE; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; + extern const int ASYNC_LOAD_WAIT_FAILED; extern const int LOGICAL_ERROR; } @@ -433,7 +434,7 @@ void AsyncLoader::wait(const LoadJobPtr & job, bool no_throw) std::unique_lock job_lock{job->mutex}; wait(job_lock, job); if (!no_throw && job->load_exception) - std::rethrow_exception(job->load_exception); + throw Exception(ErrorCodes::ASYNC_LOAD_WAIT_FAILED, "Waited job failed: {}", getExceptionMessage(job->load_exception, /* with_stacktrace = */ false)); } void AsyncLoader::remove(const LoadJobSet & jobs) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 97a339b2bac..9fad2f1ff02 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -598,6 +598,7 @@ M(717, EXPERIMENTAL_FEATURE_ERROR) \ M(718, TOO_SLOW_PARSING) \ M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \ + M(720, ASYNC_LOAD_WAIT_FAILED) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ From 51c8dd133888964b50c2fa3db5cf6069ccca0252 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 17 Apr 2024 16:17:57 +0100 Subject: [PATCH 086/651] Fix delta lake tests --- .../DataLakes/IStorageDataLake.h | 24 +++++++++++++++---- src/TableFunctions/ITableFunctionDataLake.h | 6 +++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 0e83bb70a2f..21ebc32c8ae 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -57,8 +57,8 @@ public: } return std::make_shared>( - base_configuration, std::move(metadata), configuration, object_storage, engine_name_, context, - table_id_, + base_configuration, std::move(metadata), configuration, object_storage, + engine_name_, context, table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } @@ -68,11 +68,23 @@ public: static ColumnsDescription getTableStructureFromData( ObjectStoragePtr object_storage_, ConfigurationPtr base_configuration, - const std::optional &, + const std::optional & format_settings_, ContextPtr local_context) { auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); - return ColumnsDescription(metadata->getTableSchema()); + + auto schema_from_metadata = metadata->getTableSchema(); + if (schema_from_metadata != NamesAndTypesList{}) + { + return ColumnsDescription(std::move(schema_from_metadata)); + } + else + { + ConfigurationPtr configuration = base_configuration->clone(); + configuration->getPaths() = metadata->getDataFiles(); + return Storage::getTableStructureFromData( + object_storage_, configuration, format_settings_, local_context); + } } void updateConfiguration(ContextPtr local_context) override @@ -102,6 +114,10 @@ public: , base_configuration(base_configuration_) , current_metadata(std::move(metadata_)) { + if (base_configuration->format == "auto") + { + base_configuration->format = Storage::configuration->format; + } } private: diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index c86970307c0..8cbd855bb96 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -57,8 +57,10 @@ protected: auto object_storage = TableFunction::getObjectStorage(context, !is_insert_query); return Storage::getTableStructureFromData(object_storage, configuration, std::nullopt, context); } - - return parseColumnsListFromString(configuration->structure, context); + else + { + return parseColumnsListFromString(configuration->structure, context); + } } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override From c8915a16a51719e6ba569806b377f01859971e87 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 18 Apr 2024 17:22:51 +0100 Subject: [PATCH 087/651] Fix a few mote tests --- src/Backups/BackupIO_AzureBlobStorage.cpp | 3 ++- .../registerBackupEngineAzureBlobStorage.cpp | 6 ++++-- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 5 ++++- src/Disks/ObjectStorages/S3/diskSettings.cpp | 5 ++--- src/Storages/ObjectStorage/DataLakes/Common.cpp | 2 +- .../ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 12 ++++++------ .../ObjectStorage/DataLakes/DeltaLakeMetadata.h | 6 ++++-- .../ObjectStorage/DataLakes/HudiMetadata.h | 4 +++- .../ObjectStorage/DataLakes/IStorageDataLake.h | 14 +++++++++++--- 9 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 4dd54712e5e..673930b5976 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -193,7 +193,8 @@ void BackupWriterAzureBlobStorage::copyDataToFile( { copyDataToAzureBlobStorageFile( create_read_buffer, start_pos, length, client, configuration.container, - path_in_backup, settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + fs::path(configuration.blob_path) / path_in_backup, settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 700c8cb222f..049a4b1a338 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -117,8 +117,10 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); auto path = configuration.getPath(); - configuration.setPath(removeFileNameFromURL(path)); - archive_params.archive_name = configuration.getPath(); + auto filename = removeFileNameFromURL(path); + configuration.setPath(path); + + archive_params.archive_name = filename; archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index f97d6f937ef..a2522212f90 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -60,7 +60,10 @@ void throwIfError(const Aws::Utils::Outcome & response) if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw S3Exception(fmt::format("{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())), err.GetErrorType()); + throw S3Exception( + fmt::format("{} (Code: {}, s3 exception: {})", + err.GetMessage(), static_cast(err.GetErrorType()), err.GetExceptionName()), + err.GetErrorType()); } } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 2bca7df7db9..66731e85d41 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -72,7 +72,6 @@ std::unique_ptr getClient( if (for_disk_s3) { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - url = S3::URI(endpoint); if (!url.key.ends_with('/')) url.key.push_back('/'); @@ -103,8 +102,8 @@ std::unique_ptr getClient( client_configuration.endpointOverride = url.endpoint; client_configuration.maxConnections = static_cast(request_settings.max_connections); - client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); + client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value); + client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp index 5f0138078d4..0c9237127b9 100644 --- a/src/Storages/ObjectStorage/DataLakes/Common.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp @@ -21,7 +21,7 @@ std::vector listFiles( if (filename.ends_with(suffix)) res.push_back(filename); } - LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files", res.size()); + LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files ({})", res.size(), fmt::join(res, ", ")); return res; } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 123c63439b0..d0f203b32bd 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -27,10 +27,11 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -struct DeltaLakeMetadata::Impl final : private WithContext +struct DeltaLakeMetadata::Impl { ObjectStoragePtr object_storage; ConfigurationPtr configuration; + ContextPtr context; /** * Useful links: @@ -39,9 +40,9 @@ struct DeltaLakeMetadata::Impl final : private WithContext Impl(ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) - : WithContext(context_) - , object_storage(object_storage_) + : object_storage(object_storage_) , configuration(configuration_) + , context(context_) { } @@ -137,7 +138,7 @@ struct DeltaLakeMetadata::Impl final : private WithContext */ void processMetadataFile(const String & key, std::set & result) { - auto read_settings = getContext()->getReadSettings(); + auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(key), read_settings); char c; @@ -190,7 +191,7 @@ struct DeltaLakeMetadata::Impl final : private WithContext return 0; String json_str; - auto read_settings = getContext()->getReadSettings(); + auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings); readJSONObjectPossiblyInvalid(json_str, *buf); @@ -252,7 +253,6 @@ struct DeltaLakeMetadata::Impl final : private WithContext LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string()); - auto context = getContext(); auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings); auto format_settings = getFormatSettings(context); diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index 1a5bb85586a..5050b88d809 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -9,7 +9,7 @@ namespace DB { -class DeltaLakeMetadata final : public IDataLakeMetadata, private WithContext +class DeltaLakeMetadata final : public IDataLakeMetadata { public: using ConfigurationPtr = StorageObjectStorageConfigurationPtr; @@ -28,7 +28,9 @@ public: bool operator ==(const IDataLakeMetadata & other) const override { const auto * deltalake_metadata = dynamic_cast(&other); - return deltalake_metadata && getDataFiles() == deltalake_metadata->getDataFiles(); + return deltalake_metadata + && !data_files.empty() && !deltalake_metadata->data_files.empty() + && data_files == deltalake_metadata->data_files; } static DataLakeMetadataPtr create( diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index ee8b1ea4978..6054c3f15d6 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -29,7 +29,9 @@ public: bool operator ==(const IDataLakeMetadata & other) const override { const auto * hudi_metadata = dynamic_cast(&other); - return hudi_metadata && getDataFiles() == hudi_metadata->getDataFiles(); + return hudi_metadata + && !data_files.empty() && !hudi_metadata->data_files.empty() + && data_files == hudi_metadata->data_files; } static DataLakeMetadataPtr create( diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 21ebc32c8ae..64228e880f8 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -42,17 +42,25 @@ public: auto object_storage = base_configuration->createObjectStorage(context); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + + if (base_configuration->format == "auto") + base_configuration->format = "Parquet"; + ConfigurationPtr configuration = base_configuration->clone(); + try { metadata = DataLakeMetadata::create(object_storage, base_configuration, context); schema_from_metadata = metadata->getTableSchema(); - configuration->getPaths() = metadata->getDataFiles(); + configuration->setPaths(metadata->getDataFiles()); } catch (...) { if (mode <= LoadingStrictnessLevel::CREATE) throw; + + metadata.reset(); + configuration->setPaths({}); tryLogCurrentException(__PRETTY_FUNCTION__); } @@ -100,8 +108,8 @@ public: current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); - /// If metadata wasn't changed, we won't list data files again. - updated_configuration->getPaths() = current_metadata->getDataFiles(); + updated_configuration->setPaths(current_metadata->getDataFiles()); + Storage::configuration = updated_configuration; } From e2e6071063b4ce09530746c9ef49d12a36ccec37 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 19 Apr 2024 13:43:43 +0100 Subject: [PATCH 088/651] Fix a few more tests --- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 21 ++++ .../ObjectStorages/HDFS/HDFSObjectStorage.h | 8 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 +- .../ObjectStorage/AzureBlob/Configuration.cpp | 97 +++++++++++++++---- .../ObjectStorage/HDFS/Configuration.cpp | 10 +- .../ObjectStorage/StorageObjectStorage.cpp | 50 +++------- .../StorageObjectStorageQuerySettings.h | 2 +- .../StorageObjectStorageSink.cpp | 9 ++ .../ObjectStorage/StorageObjectStorageSink.h | 3 + src/Storages/ObjectStorage/Utils.cpp | 43 ++++++++ src/Storages/ObjectStorage/Utils.h | 17 ++++ tests/integration/test_storage_hdfs/test.py | 8 +- .../test_storage_kerberized_hdfs/test.py | 2 +- 13 files changed, 204 insertions(+), 69 deletions(-) create mode 100644 src/Storages/ObjectStorage/Utils.cpp create mode 100644 src/Storages/ObjectStorage/Utils.h diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 82c9a6c6c21..fc7d49324c7 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -31,8 +31,18 @@ void HDFSObjectStorage::startup() { } +void HDFSObjectStorage::initializeHDFS() const +{ + if (hdfs_fs) + return; + + hdfs_builder = createHDFSBuilder(url, config); + hdfs_fs = createHDFSFS(hdfs_builder.get()); +} + ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { + initializeHDFS(); /// what ever data_source_description.description value is, consider that key as relative key chassert(data_directory.starts_with("/")); return ObjectStorageKey::createAsRelative( @@ -41,6 +51,7 @@ ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & bool HDFSObjectStorage::exists(const StoredObject & object) const { + initializeHDFS(); std::string path = object.remote_path; if (path.starts_with(url_without_path)) path = path.substr(url_without_path.size()); @@ -57,6 +68,7 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLIN std::optional, std::optional) const { + initializeHDFS(); std::string path = object.remote_path; if (path.starts_with(url)) path = path.substr(url.size()); @@ -73,6 +85,7 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI std::optional, std::optional) const { + initializeHDFS(); auto disk_read_settings = patchSettings(read_settings); auto read_buffer_creator = [this, disk_read_settings] @@ -102,6 +115,7 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL size_t buf_size, const WriteSettings & write_settings) { + initializeHDFS(); if (attributes.has_value()) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, @@ -123,6 +137,7 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL /// Remove file. Throws exception if file doesn't exists or it's a directory. void HDFSObjectStorage::removeObject(const StoredObject & object) { + initializeHDFS(); auto path = object.remote_path; if (path.starts_with(url_without_path)) path = path.substr(url_without_path.size()); @@ -136,24 +151,28 @@ void HDFSObjectStorage::removeObject(const StoredObject & object) void HDFSObjectStorage::removeObjects(const StoredObjects & objects) { + initializeHDFS(); for (const auto & object : objects) removeObject(object); } void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object) { + initializeHDFS(); if (exists(object)) removeObject(object); } void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { + initializeHDFS(); for (const auto & object : objects) removeObjectIfExists(object); } ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const { + initializeHDFS(); auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data()); if (!file_info) throw Exception(ErrorCodes::HDFS_ERROR, @@ -169,6 +188,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { + initializeHDFS(); auto * log = &Poco::Logger::get("HDFSObjectStorage"); LOG_TRACE(log, "Trying to list files for {}", path); @@ -222,6 +242,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT const WriteSettings & write_settings, std::optional object_to_attributes) { + initializeHDFS(); if (object_to_attributes.has_value()) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 8987fa5eaf1..f57b7e1fda8 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -37,8 +37,6 @@ public: SettingsPtr settings_, const Poco::Util::AbstractConfiguration & config_) : config(config_) - , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) - , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) { const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2); @@ -117,10 +115,12 @@ public: bool isRemote() const override { return true; } private: + void initializeHDFS() const; + const Poco::Util::AbstractConfiguration & config; - HDFSBuilderWrapper hdfs_builder; - HDFSFSPtr hdfs_fs; + mutable HDFSBuilderWrapper hdfs_builder; + mutable HDFSFSPtr hdfs_fs; SettingsPtr settings; std::string url; std::string url_without_path; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 66731e85d41..49300a9cd89 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -157,7 +157,8 @@ std::unique_ptr getClient( auth_settings.server_side_encryption_customer_key_base64, std::move(sse_kms_config), auth_settings.headers, - credentials_configuration); + credentials_configuration, + auth_settings.session_token); } } diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 44ace9c3b65..4b826a0c721 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -381,7 +381,7 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte } void StorageAzureBlobConfiguration::addStructureAndFormatToArgs( - ASTs & args, const String & structure_, const String & /* format */, ContextPtr context) + ASTs & args, const String & structure_, const String & format_, ContextPtr context) { if (tryGetNamedCollectionWithOverrides(args, context)) { @@ -397,66 +397,129 @@ void StorageAzureBlobConfiguration::addStructureAndFormatToArgs( { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage Azure requires 3 to 7 arguments: " - "StorageObjectStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); + "StorageObjectStorage(connection_string|storage_account_url, container_name, " + "blobpath, [account_name, account_key, format, compression, structure])"); } + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + auto structure_literal = std::make_shared(structure_); + auto format_literal = std::make_shared(format_); auto is_format_arg = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + /// (connection_string, container_name, blobpath) if (args.size() == 3) { - /// Add format=auto & compression=auto before structure argument. - args.push_back(std::make_shared("auto")); + args.push_back(format_literal); + /// Add compression = "auto" before structure argument. args.push_back(std::make_shared("auto")); args.push_back(structure_literal); } + /// (connection_string, container_name, blobpath, structure) or + /// (connection_string, container_name, blobpath, format) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. else if (args.size() == 4) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + /// (..., format) -> (..., format, compression, structure) if (is_format_arg(fourth_arg)) { + if (fourth_arg == "auto") + args[3] = format_literal; /// Add compression=auto before structure argument. args.push_back(std::make_shared("auto")); args.push_back(structure_literal); } + /// (..., structure) -> (..., format, compression, structure) else { - args.back() = structure_literal; + auto structure_arg = args.back(); + args[3] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (fourth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); } } + /// (connection_string, container_name, blobpath, format, compression) or + /// (storage_account_url, container_name, blobpath, account_name, account_key) + /// We can distinguish them by looking at the 4-th argument: check if it's format name or not. else if (args.size() == 5) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - if (!is_format_arg(fourth_arg)) + /// (..., format, compression) -> (..., format, compression, structure) + if (is_format_arg(fourth_arg)) { - /// Add format=auto & compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(std::make_shared("auto")); + if (fourth_arg == "auto") + args[3] = format_literal; + args.push_back(structure_literal); } - args.push_back(structure_literal); - } - else if (args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); - if (!is_format_arg(fourth_arg)) + /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) + else { + args.push_back(format_literal); /// Add compression=auto before structure argument. args.push_back(std::make_shared("auto")); args.push_back(structure_literal); } + } + /// (connection_string, container_name, blobpath, format, compression, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or + /// (storage_account_url, container_name, blobpath, account_name, account_key, format) + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + auto sixth_arg = checkAndGetLiteralArgument(args[5], "format/structure"); + + /// (..., format, compression, structure) + if (is_format_arg(fourth_arg)) + { + if (fourth_arg == "auto") + args[3] = format_literal; + if (checkAndGetLiteralArgument(args[5], "structure") == "auto") + args[5] = structure_literal; + } + /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) + else if (is_format_arg(sixth_arg)) + { + if (sixth_arg == "auto") + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) else { - args.back() = structure_literal; + auto structure_arg = args.back(); + args[5] = format_literal; + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + if (sixth_arg == "auto") + args.push_back(structure_literal); + else + args.push_back(structure_arg); } } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression) else if (args.size() == 7) { + /// (..., format, compression) -> (..., format, compression, structure) + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; args.push_back(structure_literal); } + /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) else if (args.size() == 8) { - args.back() = structure_literal; + if (checkAndGetLiteralArgument(args[5], "format") == "auto") + args[5] = format_literal; + if (checkAndGetLiteralArgument(args[7], "structure") == "auto") + args[7] = structure_literal; } } } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index af191070329..84f0a7bfe9f 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -73,9 +73,11 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit std::string url_str; url_str = checkAndGetLiteralArgument(args[0], "url"); + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + if (args.size() > 1) { - args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); format = checkAndGetLiteralArgument(args[1], "format_name"); } @@ -83,18 +85,15 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit { if (args.size() > 2) { - args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); structure = checkAndGetLiteralArgument(args[2], "structure"); } if (args.size() > 3) { - args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context); compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); } } else if (args.size() > 2) { - args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); } @@ -165,6 +164,9 @@ void StorageHDFSConfiguration::addStructureAndFormatToArgs( auto format_literal = std::make_shared(format_); auto structure_literal = std::make_shared(structure_); + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + /// hdfs(url) if (count == 1) { diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 8fc3de4de1b..13f3557d927 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -193,6 +194,7 @@ SinkToStoragePtr StorageObjectStorage::write( { updateConfiguration(local_context); const auto sample_block = metadata_snapshot->getSampleBlock(); + const auto & query_settings = StorageSettings::create(local_context->getSettingsRef()); if (configuration->withWildcard()) { @@ -209,7 +211,8 @@ SinkToStoragePtr StorageObjectStorage::write( { LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath()); return std::make_shared( - object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); + object_storage, configuration, query_settings, + format_settings, sample_block, local_context, partition_by_ast); } } @@ -220,46 +223,19 @@ SinkToStoragePtr StorageObjectStorage::write( getName(), configuration->getPath()); } - const auto storage_settings = StorageSettings::create(local_context->getSettingsRef()); - - auto configuration_copy = configuration->clone(); - if (!storage_settings.truncate_on_insert - && object_storage->exists(StoredObject(configuration->getPath()))) + auto & paths = configuration->getPaths(); + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( + *object_storage, *configuration, query_settings, paths.front(), paths.size())) { - if (storage_settings.create_new_file_on_insert) - { - auto & paths = configuration_copy->getPaths(); - size_t index = paths.size(); - const auto & first_key = paths[0]; - auto pos = first_key.find_first_of('.'); - String new_key; - - do - { - new_key = first_key.substr(0, pos) - + "." - + std::to_string(index) - + (pos == std::string::npos ? "" : first_key.substr(pos)); - ++index; - } - while (object_storage->exists(StoredObject(new_key))); - - paths.push_back(new_key); - configuration->getPaths().push_back(new_key); - } - else - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert", - configuration_copy->getNamespace(), configuration_copy->getPaths().back()); - } + paths.push_back(*new_key); } return std::make_shared( - object_storage, configuration_copy, format_settings, sample_block, local_context); + object_storage, + configuration->clone(), + format_settings, + sample_block, + local_context); } template diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h index f0687776aa7..606456011c3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h @@ -84,7 +84,7 @@ struct HDFSStorageSettings .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, .schema_inference_mode = settings.schema_inference_mode, - .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs + .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 8381737a4f5..42371764920 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -102,6 +103,7 @@ void StorageObjectStorageSink::release() PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, StorageObjectStorageConfigurationPtr configuration_, + const StorageObjectStorageSettings & query_settings_, std::optional format_settings_, const Block & sample_block_, ContextPtr context_, @@ -109,6 +111,7 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( : PartitionedSink(partition_by, context_, sample_block_) , object_storage(object_storage_) , configuration(configuration_) + , query_settings(query_settings_) , format_settings(format_settings_) , sample_block(sample_block_) , context(context_) @@ -123,6 +126,12 @@ SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String auto partition_key = replaceWildcards(configuration->getPath(), partition_id); validateKey(partition_key); + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( + *object_storage, *configuration, query_settings, partition_key, /* sequence_number */1)) + { + partition_key = *new_key; + } + return std::make_shared( object_storage, configuration, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index a352e2c66a3..38805332a35 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include @@ -46,6 +47,7 @@ public: PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, StorageObjectStorageConfigurationPtr configuration_, + const StorageObjectStorageSettings & query_settings_, std::optional format_settings_, const Block & sample_block_, ContextPtr context_, @@ -59,6 +61,7 @@ private: ObjectStoragePtr object_storage; StorageObjectStorageConfigurationPtr configuration; + const StorageObjectStorageSettings query_settings; const std::optional format_settings; const Block sample_block; const ContextPtr context; diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp new file mode 100644 index 00000000000..6cc3962209f --- /dev/null +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include + + +namespace DB +{ + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const IObjectStorage & object_storage, + const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorageSettings & query_settings, + const String & key, + size_t sequence_number) +{ + if (query_settings.truncate_on_insert + || !object_storage.exists(StoredObject(key))) + return std::nullopt; + + if (query_settings.create_new_file_on_insert) + { + auto pos = key.find_first_of('.'); + String new_key; + do + { + new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); + ++sequence_number; + } + while (object_storage.exists(StoredObject(new_key))); + + return new_key; + } + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", + configuration.getNamespace(), key); +} + +} diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h new file mode 100644 index 00000000000..9291bb72615 --- /dev/null +++ b/src/Storages/ObjectStorage/Utils.h @@ -0,0 +1,17 @@ +#include + +namespace DB +{ + +class IObjectStorage; +class StorageObjectStorageConfiguration; +struct StorageObjectStorageSettings; + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const IObjectStorage & object_storage, + const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorageSettings & query_settings, + const std::string & key, + size_t sequence_number); + +} diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index d8dab85ee6a..dc375b9ec36 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -980,7 +980,7 @@ def test_read_subcolumns(started_cluster): assert ( res - == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" ) res = node.query( @@ -989,7 +989,7 @@ def test_read_subcolumns(started_cluster): assert ( res - == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" ) res = node.query( @@ -998,7 +998,7 @@ def test_read_subcolumns(started_cluster): assert ( res - == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" ) res = node.query( @@ -1007,7 +1007,7 @@ def test_read_subcolumns(started_cluster): assert ( res - == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" ) diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py index c72152fa376..ddfc1f6483d 100644 --- a/tests/integration/test_storage_kerberized_hdfs/test.py +++ b/tests/integration/test_storage_kerberized_hdfs/test.py @@ -130,7 +130,7 @@ def test_prohibited(started_cluster): assert False, "Exception have to be thrown" except Exception as ex: assert ( - "Unable to open HDFS file: /storage_user_two_prohibited error: Permission denied: user=specuser, access=WRITE" + "Unable to open HDFS file: /storage_user_two_prohibited (hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited) error: Permission denied: user=specuser, access=WRITE" in str(ex) ) From 191937c0c6c5e5a31c6045269026ca1a1e5171c7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 20 Apr 2024 10:19:55 +0100 Subject: [PATCH 089/651] Fix style check --- tests/integration/test_storage_hdfs/test.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index dc375b9ec36..820e3db6eb1 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -978,37 +978,25 @@ def test_read_subcolumns(started_cluster): f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert ( - res - == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" - ) + assert res == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" res = node.query( f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert ( - res - == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" - ) + assert res == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert ( - res - == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" - ) + assert res == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" ) - assert ( - res - == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" - ) + assert res == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" def test_union_schema_inference_mode(started_cluster): From c7f0cfc4c2df850cf97c81febd61b3411c4e7869 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 20 Apr 2024 11:47:41 +0100 Subject: [PATCH 090/651] Fix style check --- src/Storages/ObjectStorage/Utils.cpp | 5 +++++ src/Storages/ObjectStorage/Utils.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index 6cc3962209f..9caab709081 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -7,6 +7,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + std::optional checkAndGetNewFileOnInsertIfNeeded( const IObjectStorage & object_storage, const StorageObjectStorageConfiguration & configuration, diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h index 9291bb72615..afc0f31a33f 100644 --- a/src/Storages/ObjectStorage/Utils.h +++ b/src/Storages/ObjectStorage/Utils.h @@ -1,3 +1,4 @@ +#pragma once #include namespace DB From a4daf2b454c44e1891a61eaddf3a2fd965e5f880 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 20 Apr 2024 14:46:32 +0100 Subject: [PATCH 091/651] Fix hdfs race --- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp | 7 ++++++- src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index fc7d49324c7..ed63795cb05 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -33,11 +33,16 @@ void HDFSObjectStorage::startup() void HDFSObjectStorage::initializeHDFS() const { - if (hdfs_fs) + if (initialized) + return; + + std::lock_guard lock(init_mutex); + if (initialized) return; hdfs_builder = createHDFSBuilder(url, config); hdfs_fs = createHDFSFS(hdfs_builder.get()); + initialized = true; } ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index f57b7e1fda8..b626d3dc779 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -121,6 +121,10 @@ private: mutable HDFSBuilderWrapper hdfs_builder; mutable HDFSFSPtr hdfs_fs; + + mutable std::mutex init_mutex; + mutable std::atomic_bool initialized{false}; + SettingsPtr settings; std::string url; std::string url_without_path; From 399414bb40e517b315ab396669875af8e365ece0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 20 Apr 2024 17:27:54 +0100 Subject: [PATCH 092/651] Better --- src/Common/CurrentMetrics.cpp | 3 + src/Interpreters/InterpreterSystemQuery.cpp | 9 +- .../ObjectStorage/AzureBlob/Configuration.cpp | 15 ++ .../ObjectStorage/AzureBlob/Configuration.h | 11 +- .../DataLakes/DeltaLakeMetadata.cpp | 2 +- .../DataLakes/IStorageDataLake.h | 17 +- .../DataLakes/registerDataLakeStorages.cpp | 7 +- .../ObjectStorage/HDFS/Configuration.cpp | 14 ++ .../ObjectStorage/HDFS/Configuration.h | 11 +- .../ObjectStorage/ReadBufferIterator.cpp | 4 +- .../ObjectStorage/ReadBufferIterator.h | 4 +- ...rage.cpp => ReadFromObjectStorageStep.cpp} | 32 +--- ...tStorage.h => ReadFromObjectStorageStep.h} | 18 +- .../ObjectStorage/S3/Configuration.cpp | 15 ++ src/Storages/ObjectStorage/S3/Configuration.h | 11 +- .../ObjectStorage/StorageObjectStorage.cpp | 181 +++++++----------- .../ObjectStorage/StorageObjectStorage.h | 69 +++---- .../StorageObjectStorageCluster.cpp | 89 ++++----- .../StorageObjectStorageCluster.h | 28 +-- .../StorageObjectStorageConfiguration.h | 7 + .../StorageObjectStorageQuerySettings.h | 102 ---------- .../StorageObjectStorageSink.cpp | 3 +- .../ObjectStorage/StorageObjectStorageSink.h | 4 +- .../StorageObjectStorageSource.cpp | 49 ++--- .../StorageObjectStorageSource.h | 26 +-- src/Storages/ObjectStorage/Utils.cpp | 42 +++- src/Storages/ObjectStorage/Utils.h | 14 +- .../registerStorageObjectStorage.cpp | 20 +- src/Storages/S3Queue/S3QueueSource.h | 3 +- src/Storages/S3Queue/StorageS3Queue.cpp | 25 +-- src/Storages/S3Queue/StorageS3Queue.h | 3 +- .../StorageSystemSchemaInferenceCache.cpp | 9 +- src/TableFunctions/ITableFunctionDataLake.h | 2 +- .../TableFunctionObjectStorage.cpp | 78 ++++---- .../TableFunctionObjectStorage.h | 8 +- .../TableFunctionObjectStorageCluster.cpp | 16 +- .../TableFunctionObjectStorageCluster.h | 12 +- 37 files changed, 427 insertions(+), 536 deletions(-) rename src/Storages/ObjectStorage/{ReadFromStorageObjectStorage.cpp => ReadFromObjectStorageStep.cpp} (62%) rename src/Storages/ObjectStorage/{ReadFromStorageObjectStorage.h => ReadFromObjectStorageStep.h} (70%) delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 0f25397a961..983e737991c 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -168,6 +168,9 @@ M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \ M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \ M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \ + M(StorageObjectStorageThreads, "Number of threads in the remote table engines thread pools.") \ + M(StorageObjectStorageThreadsActive, "Number of threads in the remote table engines thread pool running a task.") \ + M(StorageObjectStorageThreadsScheduled, "Number of queued or active jobs in remote table engines thread pool.") \ M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \ M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \ M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \ diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 27b2a9460b7..af9dc08e8c7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -53,6 +53,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -489,17 +492,17 @@ BlockIO InterpreterSystemQuery::execute() StorageFile::getSchemaCache(getContext()).clear(); #if USE_AWS_S3 if (caches_to_drop.contains("S3")) - StorageS3::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageS3Configuration::type_name).clear(); #endif #if USE_HDFS if (caches_to_drop.contains("HDFS")) - StorageHDFS::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageHDFSConfiguration::type_name).clear(); #endif if (caches_to_drop.contains("URL")) StorageURL::getSchemaCache(getContext()).clear(); #if USE_AZURE_BLOB_STORAGE if (caches_to_drop.contains("AZURE")) - StorageAzureBlob::getSchemaCache(getContext()).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageAzureBlobConfiguration::type_name).clear(); #endif break; } diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index 4b826a0c721..c9bc59d62aa 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -101,6 +101,21 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co return settings_ptr; } +StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.azure_truncate_on_insert, + .create_new_file_on_insert = settings.azure_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure + .list_object_keys_size = settings.azure_list_object_keys_size, + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist, + }; +} + ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h index c12ff81197d..7e105ea82b5 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h @@ -18,9 +18,15 @@ class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory); public: + static constexpr auto type_name = "azure"; + static constexpr auto engine_name = "Azure"; + StorageAzureBlobConfiguration() = default; StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other); + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return engine_name; } + Path getPath() const override { return blob_path; } void setPath(const Path & path) override { blob_path = path; } @@ -30,6 +36,7 @@ public: String getDataSourceDescription() override { return fs::path(connection_url) / container; } String getNamespace() const override { return container; } + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT @@ -37,8 +44,8 @@ public: void fromNamedCollection(const NamedCollection & collection) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; - static void addStructureAndFormatToArgs( - ASTs & args, const String & structure_, const String & format_, ContextPtr context); + void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) override; protected: using AzureClient = Azure::Storage::Blobs::BlobContainerClient; diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index d0f203b32bd..c6590ba8d43 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -184,7 +184,7 @@ struct DeltaLakeMetadata::Impl * * We need to get "version", which is the version of the checkpoint we need to read. */ - size_t readLastCheckpointIfExists() + size_t readLastCheckpointIfExists() const { const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; if (!object_storage->exists(StoredObject(last_checkpoint_file))) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 64228e880f8..e1851775925 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -21,17 +21,16 @@ namespace DB /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Right now it's implemented on top of StorageS3 and right now it doesn't support /// many Iceberg features like schema evolution, partitioning, positional and equality deletes. -template -class IStorageDataLake final : public StorageObjectStorage +template +class IStorageDataLake final : public StorageObjectStorage { public: - using Storage = StorageObjectStorage; + using Storage = StorageObjectStorage; using ConfigurationPtr = Storage::ConfigurationPtr; static StoragePtr create( ConfigurationPtr base_configuration, ContextPtr context, - const String & engine_name_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, @@ -64,9 +63,9 @@ public: tryLogCurrentException(__PRETTY_FUNCTION__); } - return std::make_shared>( + return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, - engine_name_, context, table_id_, + context, table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } @@ -133,9 +132,9 @@ private: DataLakeMetadataPtr current_metadata; }; -using StorageIceberg = IStorageDataLake; -using StorageDeltaLake = IStorageDataLake; -using StorageHudi = IStorageDataLake; +using StorageIceberg = IStorageDataLake; +using StorageDeltaLake = IStorageDataLake; +using StorageHudi = IStorageDataLake; } diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp index d11dd1ca836..a5170e5ed6b 100644 --- a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp +++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB @@ -24,7 +23,7 @@ void registerStorageIceberg(StorageFactory & factory) StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); return StorageIceberg::create( - configuration, args.getContext(), "Iceberg", args.table_id, args.columns, + configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, std::nullopt, args.mode); }, { @@ -47,7 +46,7 @@ void registerStorageDeltaLake(StorageFactory & factory) StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); return StorageDeltaLake::create( - configuration, args.getContext(), "DeltaLake", args.table_id, args.columns, + configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, std::nullopt, args.mode); }, { @@ -68,7 +67,7 @@ void registerStorageHudi(StorageFactory & factory) StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); return StorageHudi::create( - configuration, args.getContext(), "Hudi", args.table_id, args.columns, + configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, std::nullopt, args.mode); }, { diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 84f0a7bfe9f..0062ac969ac 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -60,6 +60,20 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const return "/"; return path.substr(0, end_of_path_without_globs); } +StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.hdfs_truncate_on_insert, + .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs + .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, + }; +} void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure) { diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 23a7e8e4549..0a502857153 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -13,9 +13,15 @@ namespace DB class StorageHDFSConfiguration : public StorageObjectStorageConfiguration { public: + static constexpr auto type_name = "hdfs"; + static constexpr auto engine_name = "HDFS"; + StorageHDFSConfiguration() = default; StorageHDFSConfiguration(const StorageHDFSConfiguration & other); + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return engine_name; } + Path getPath() const override { return path; } void setPath(const Path & path_) override { path = path_; } @@ -25,13 +31,14 @@ public: String getNamespace() const override { return ""; } String getDataSourceDescription() override { return url; } + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } - static void addStructureAndFormatToArgs( - ASTs & args, const String & structure_, const String & format_, ContextPtr context); + void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) override; std::string getPathWithoutGlob() const override; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 0b6e34fb831..f8ce90a2b1f 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -19,7 +18,6 @@ ReadBufferIterator::ReadBufferIterator( ConfigurationPtr configuration_, const FileIterator & file_iterator_, const std::optional & format_settings_, - const StorageObjectStorageSettings & query_settings_, SchemaCache & schema_cache_, ObjectInfos & read_keys_, const ContextPtr & context_) @@ -28,7 +26,7 @@ ReadBufferIterator::ReadBufferIterator( , configuration(configuration_) , file_iterator(file_iterator_) , format_settings(format_settings_) - , query_settings(query_settings_) + , query_settings(configuration->getQuerySettings(context_)) , schema_cache(schema_cache_) , read_keys(read_keys_) , format(configuration->format == "auto" ? std::nullopt : std::optional(configuration->format)) diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 053bcbf894f..2d58e1c789e 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -2,7 +2,6 @@ #include #include #include -#include #include @@ -19,7 +18,6 @@ public: ConfigurationPtr configuration_, const FileIterator & file_iterator_, const std::optional & format_settings_, - const StorageObjectStorageSettings & query_settings_, SchemaCache & schema_cache_, ObjectInfos & read_keys_, const ContextPtr & context_); @@ -50,7 +48,7 @@ private: const ConfigurationPtr configuration; const FileIterator file_iterator; const std::optional & format_settings; - const StorageObjectStorageSettings query_settings; + const StorageObjectStorage::QuerySettings query_settings; SchemaCache & schema_cache; ObjectInfos & read_keys; std::optional format; diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp similarity index 62% rename from src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp rename to src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp index 89d33191f41..f19e01cdc3e 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp @@ -1,11 +1,11 @@ -#include +#include #include #include namespace DB { -ReadFromStorageObejctStorage::ReadFromStorageObejctStorage( +ReadFromObjectStorageStep::ReadFromObjectStorageStep( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, const String & name_, @@ -14,49 +14,41 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage( const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, const std::optional & format_settings_, - const StorageObjectStorageSettings & query_settings_, bool distributed_processing_, ReadFromFormatInfo info_, SchemaCache & schema_cache_, const bool need_only_count_, ContextPtr context_, size_t max_block_size_, - size_t num_streams_, - CurrentMetrics::Metric metric_threads_count_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_) + size_t num_streams_) : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) , info(std::move(info_)) , virtual_columns(virtual_columns_) , format_settings(format_settings_) - , query_settings(query_settings_) + , query_settings(configuration->getQuerySettings(context_)) , schema_cache(schema_cache_) , name(name_ + "Source") , need_only_count(need_only_count_) , max_block_size(max_block_size_) , num_streams(num_streams_) , distributed_processing(distributed_processing_) - , metric_threads_count(metric_threads_count_) - , metric_threads_active(metric_threads_active_) - , metric_threads_scheduled(metric_threads_scheduled_) { } -void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predicate) +void ReadFromObjectStorageStep::createIterator(const ActionsDAG::Node * predicate) { if (!iterator_wrapper) { auto context = getContext(); iterator_wrapper = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, query_settings, distributed_processing, - context, predicate, virtual_columns, nullptr, metric_threads_count, - metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback()); + configuration, object_storage, distributed_processing, + context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); } } -void ReadFromStorageObejctStorage::applyFilters(ActionDAGNodes added_filter_nodes) +void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes) { filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; @@ -66,7 +58,7 @@ void ReadFromStorageObejctStorage::applyFilters(ActionDAGNodes added_filter_node createIterator(predicate); } -void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void ReadFromObjectStorageStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { createIterator(nullptr); auto context = getContext(); @@ -74,13 +66,9 @@ void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pip Pipes pipes; for (size_t i = 0; i < num_streams; ++i) { - auto threadpool = std::make_shared( - metric_threads_count, metric_threads_active, metric_threads_scheduled, /* max_threads */1); - auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, query_settings, - context, max_block_size, iterator_wrapper, need_only_count, schema_cache, - std::move(threadpool), metric_threads_count, metric_threads_active, metric_threads_scheduled); + context, max_block_size, iterator_wrapper, need_only_count, schema_cache); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h similarity index 70% rename from src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h rename to src/Storages/ObjectStorage/ReadFromObjectStorageStep.h index c0dd02d75f8..d98ebfef1f2 100644 --- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h +++ b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h @@ -1,17 +1,16 @@ #pragma once -#include -#include #include +#include namespace DB { -class ReadFromStorageObejctStorage : public SourceStepWithFilter +class ReadFromObjectStorageStep : public SourceStepWithFilter { public: using ConfigurationPtr = StorageObjectStorageConfigurationPtr; - ReadFromStorageObejctStorage( + ReadFromObjectStorageStep( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, const String & name_, @@ -20,17 +19,13 @@ public: const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, const std::optional & format_settings_, - const StorageObjectStorageSettings & query_settings_, bool distributed_processing_, ReadFromFormatInfo info_, SchemaCache & schema_cache_, bool need_only_count_, ContextPtr context_, size_t max_block_size_, - size_t num_streams_, - CurrentMetrics::Metric metric_threads_count_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_); + size_t num_streams_); std::string getName() const override { return name; } @@ -46,16 +41,13 @@ private: const ReadFromFormatInfo info; const NamesAndTypesList virtual_columns; const std::optional format_settings; - const StorageObjectStorageSettings query_settings; + const StorageObjectStorage::QuerySettings query_settings; SchemaCache & schema_cache; const String name; const bool need_only_count; const size_t max_block_size; const size_t num_streams; const bool distributed_processing; - const CurrentMetrics::Metric metric_threads_count; - const CurrentMetrics::Metric metric_threads_active; - const CurrentMetrics::Metric metric_threads_scheduled; void createIterator(const ActionsDAG::Node * predicate); }; diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 4c9e49d0705..139d9004f8e 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -70,6 +70,21 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot keys = other.keys; } +StorageObjectStorage::QuerySettings StorageS3Configuration::getQuerySettings(const ContextPtr & context) const +{ + const auto & settings = context->getSettingsRef(); + return StorageObjectStorage::QuerySettings{ + .truncate_on_insert = settings.s3_truncate_on_insert, + .create_new_file_on_insert = settings.s3_create_new_file_on_insert, + .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3, + .schema_inference_mode = settings.schema_inference_mode, + .skip_empty_files = settings.s3_skip_empty_files, + .list_object_keys_size = settings.s3_list_object_keys_size, + .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist, + }; +} + ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT { assertInitialized(); diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index ff5e8680e66..de4a6d17579 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -14,9 +15,14 @@ namespace DB class StorageS3Configuration : public StorageObjectStorageConfiguration { public: + static constexpr auto type_name = "s3"; + StorageS3Configuration() = default; StorageS3Configuration(const StorageS3Configuration & other); + std::string getTypeName() const override { return type_name; } + std::string getEngineName() const override { return url.storage_name; } + Path getPath() const override { return url.key; } void setPath(const Path & path) override { url.key = path; } @@ -26,6 +32,7 @@ public: String getNamespace() const override { return url.bucket; } String getDataSourceDescription() override; + StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; void validateNamespace(const String & name) const override; @@ -34,8 +41,8 @@ public: bool isStaticConfiguration() const override { return static_configuration; } ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT - static void addStructureAndFormatToArgs( - ASTs & args, const String & structure, const String & format, ContextPtr context); + void addStructureAndFormatToArgs( + ASTs & args, const String & structure, const String & format, ContextPtr context) override; private: void fromNamedCollection(const NamedCollection & collection) override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 13f3557d927..441639629a3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -11,10 +11,9 @@ #include #include #include -#include #include #include -#include +#include #include #include #include @@ -25,53 +24,13 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int DATABASE_ACCESS_DENIED; extern const int NOT_IMPLEMENTED; } -template -std::unique_ptr getStorageMetadata( - ObjectStoragePtr object_storage, - const StorageObjectStorageConfigurationPtr & configuration, - const ColumnsDescription & columns, - const ConstraintsDescription & constraints, - std::optional format_settings, - const String & comment, - const std::string & engine_name, - const ContextPtr & context) -{ - using Storage = StorageObjectStorage; - - auto storage_metadata = std::make_unique(); - if (columns.empty()) - { - auto fetched_columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context); - storage_metadata->setColumns(fetched_columns); - } - else if (!columns.hasOnlyOrdinary()) - { - /// We don't allow special columns. - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine {} doesn't support special columns " - "like MATERIALIZED, ALIAS or EPHEMERAL", engine_name); - } - else - { - if (configuration->format == "auto") - Storage::setFormatFromData(object_storage, configuration, format_settings, context); - - storage_metadata->setColumns(columns); - } - storage_metadata->setConstraints(constraints); - storage_metadata->setComment(comment); - return storage_metadata; -} - -template -StorageObjectStorage::StorageObjectStorage( +StorageObjectStorage::StorageObjectStorage( ConfigurationPtr configuration_, ObjectStoragePtr object_storage_, - const String & engine_name_, ContextPtr context, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -80,16 +39,13 @@ StorageObjectStorage::StorageObjectStorage( std::optional format_settings_, bool distributed_processing_, ASTPtr partition_by_) - : IStorage(table_id_, getStorageMetadata( - object_storage_, configuration_, columns_, constraints_, format_settings_, - comment, engine_name, context)) - , engine_name(engine_name_) + : IStorage(table_id_) + , configuration(configuration_) + , object_storage(object_storage_) , format_settings(format_settings_) , partition_by(partition_by_) , distributed_processing(distributed_processing_) - , log(getLogger("Storage" + engine_name_)) - , object_storage(object_storage_) - , configuration(configuration_) + , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName()))) { FormatFactory::instance().checkFormatName(configuration->format); configuration->check(context); @@ -98,46 +54,41 @@ StorageObjectStorage::StorageObjectStorage( for (const auto & key : configuration->getPaths()) objects.emplace_back(key); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(getInMemoryMetadataPtr()->getColumns())); + auto metadata = getStorageMetadata( + object_storage_, configuration_, columns_, + constraints_, format_settings_, comment, context); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + setInMemoryMetadata(std::move(metadata)); } -template -bool StorageObjectStorage::prefersLargeBlocks() const +String StorageObjectStorage::getName() const +{ + return configuration->getEngineName(); +} + +bool StorageObjectStorage::prefersLargeBlocks() const { return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format); } -template -bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const +bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const { return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context); } -template -bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const +bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings); } -template -void StorageObjectStorage::updateConfiguration(ContextPtr context) +void StorageObjectStorage::updateConfiguration(ContextPtr context) { if (!configuration->isStaticConfiguration()) object_storage->applyNewSettings(context->getConfigRef(), "s3.", context); } -template -SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context) -{ - static SchemaCache schema_cache( - context->getConfigRef().getUInt( - StorageSettings::SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING, - DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; -} - -template -void StorageObjectStorage::read( +void StorageObjectStorage::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -155,13 +106,12 @@ void StorageObjectStorage::read( getName()); } - const auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + const auto read_from_format_info = prepareReadingFromFormat( + column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; - LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII SOURCE HEADER: {}", read_from_format_info.source_header.dumpStructure()); - LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII FORMAT HEADER: {}", read_from_format_info.format_header.dumpStructure()); - auto read_step = std::make_unique( + auto read_step = std::make_unique( object_storage, configuration, getName(), @@ -170,23 +120,18 @@ void StorageObjectStorage::read( query_info, storage_snapshot, format_settings, - StorageSettings::create(local_context->getSettingsRef()), distributed_processing, std::move(read_from_format_info), getSchemaCache(local_context), need_only_count, local_context, max_block_size, - num_streams, - StorageSettings::ObjectStorageThreads(), - StorageSettings::ObjectStorageThreadsActive(), - StorageSettings::ObjectStorageThreadsScheduled()); + num_streams); query_plan.addStep(std::move(read_step)); } -template -SinkToStoragePtr StorageObjectStorage::write( +SinkToStoragePtr StorageObjectStorage::write( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, @@ -194,7 +139,7 @@ SinkToStoragePtr StorageObjectStorage::write( { updateConfiguration(local_context); const auto sample_block = metadata_snapshot->getSampleBlock(); - const auto & query_settings = StorageSettings::create(local_context->getSettingsRef()); + const auto & settings = configuration->getQuerySettings(local_context); if (configuration->withWildcard()) { @@ -209,23 +154,22 @@ SinkToStoragePtr StorageObjectStorage::write( if (partition_by_ast) { - LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath()); return std::make_shared( - object_storage, configuration, query_settings, - format_settings, sample_block, local_context, partition_by_ast); + object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast); } } if (configuration->withGlobs()) { - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "{} key '{}' contains globs, so the table is in readonly mode", - getName(), configuration->getPath()); + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "{} key '{}' contains globs, so the table is in readonly mode", + getName(), configuration->getPath()); } auto & paths = configuration->getPaths(); if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( - *object_storage, *configuration, query_settings, paths.front(), paths.size())) + *object_storage, *configuration, settings, paths.front(), paths.size())) { paths.push_back(*new_key); } @@ -238,9 +182,11 @@ SinkToStoragePtr StorageObjectStorage::write( local_context); } -template -void StorageObjectStorage::truncate( - const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +void StorageObjectStorage::truncate( + const ASTPtr &, + const StorageMetadataPtr &, + ContextPtr, + TableExclusiveLockHolder &) { if (configuration->withGlobs()) { @@ -257,34 +203,37 @@ void StorageObjectStorage::truncate( object_storage->removeObjectsIfExist(objects); } -template -std::unique_ptr StorageObjectStorage::createReadBufferIterator( +std::unique_ptr StorageObjectStorage::createReadBufferIterator( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, ObjectInfos & read_keys, const ContextPtr & context) { - const auto settings = StorageSettings::create(context->getSettingsRef()); auto file_iterator = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, settings, /* distributed_processing */false, - context, /* predicate */{}, /* virtual_columns */{}, &read_keys, - StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled()); + configuration, + object_storage, + false/* distributed_processing */, + context, + {}/* predicate */, + {}/* virtual_columns */, + &read_keys); return std::make_unique( object_storage, configuration, file_iterator, - format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context); + format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context); } -template -ColumnsDescription StorageObjectStorage::getTableStructureFromData( +ColumnsDescription StorageObjectStorage::getTableStructureFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, const ContextPtr & context) { ObjectInfos read_keys; - auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + auto read_buffer_iterator = createReadBufferIterator( + object_storage, configuration, format_settings, read_keys, context); + if (configuration->format == "auto") { auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context); @@ -297,20 +246,34 @@ ColumnsDescription StorageObjectStorage::getTableStructureFromD } } -template -void StorageObjectStorage::setFormatFromData( +void StorageObjectStorage::setFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, const ContextPtr & context) { ObjectInfos read_keys; - auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + auto read_buffer_iterator = createReadBufferIterator( + object_storage, configuration, format_settings, read_keys, context); configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second; } -template class StorageObjectStorage; -template class StorageObjectStorage; -template class StorageObjectStorage; +SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context) +{ + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_" + configuration->getTypeName(), + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; +} + +SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name) +{ + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_" + storage_type_name, + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; +} } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index a2112f7ed01..3dbe010e406 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -1,31 +1,22 @@ #pragma once - -#include #include #include -#include #include #include #include - namespace DB { -struct SelectQueryInfo; class StorageObjectStorageConfiguration; -struct S3StorageSettings; -struct HDFSStorageSettings; -struct AzureStorageSettings; -class PullingPipelineExecutor; -using ReadTaskCallback = std::function; -class IOutputFormat; -class IInputFormat; -class SchemaCache; class ReadBufferIterator; +class SchemaCache; - -template +/** + * A general class containing implementation for external table engines + * such as StorageS3, StorageAzure, StorageHDFS. + * Works with an object of IObjectStorage class. + */ class StorageObjectStorage : public IStorage { public: @@ -35,10 +26,26 @@ public: using ObjectInfoPtr = std::shared_ptr; using ObjectInfos = std::vector; + struct QuerySettings + { + /// Insert settings: + bool truncate_on_insert; + bool create_new_file_on_insert; + + /// Schema inference settings: + bool schema_inference_use_cache; + SchemaInferenceMode schema_inference_mode; + + /// List settings: + bool skip_empty_files; + size_t list_object_keys_size; + bool throw_on_zero_files_match; + bool ignore_non_existent_file; + }; + StorageObjectStorage( ConfigurationPtr configuration_, ObjectStoragePtr object_storage_, - const String & engine_name_, ContextPtr context_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -48,17 +55,17 @@ public: bool distributed_processing_ = false, ASTPtr partition_by_ = nullptr); - String getName() const override { return engine_name; } + String getName() const override; void read( QueryPlan & query_plan, - const Names &, - const StorageSnapshotPtr &, - SelectQueryInfo &, - ContextPtr, - QueryProcessingStage::Enum, - size_t, - size_t) override; + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, @@ -84,7 +91,9 @@ public: bool parallelizeOutputAfterReading(ContextPtr context) const override; - static SchemaCache & getSchemaCache(const ContextPtr & context); + SchemaCache & getSchemaCache(const ContextPtr & context); + + static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name); static ColumnsDescription getTableStructureFromData( const ObjectStoragePtr & object_storage, @@ -108,19 +117,15 @@ protected: ObjectInfos & read_keys, const ContextPtr & context); + ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; const std::string engine_name; - std::optional format_settings; + const std::optional format_settings; const ASTPtr partition_by; const bool distributed_processing; LoggerPtr log; - ObjectStoragePtr object_storage; - ConfigurationPtr configuration; std::mutex configuration_update_mutex; }; -using StorageS3 = StorageObjectStorage; -using StorageAzureBlob = StorageObjectStorage; -using StorageHDFS = StorageObjectStorage; - } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index f023bb068d4..72a35ae33eb 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -24,47 +25,34 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -template -StorageObjectStorageCluster::StorageObjectStorageCluster( +StorageObjectStorageCluster::StorageObjectStorageCluster( const String & cluster_name_, - const Storage::ConfigurationPtr & configuration_, + ConfigurationPtr configuration_, ObjectStoragePtr object_storage_, - const String & engine_name_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_) - : IStorageCluster(cluster_name_, - table_id_, - getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name))) - , engine_name(engine_name_) + : IStorageCluster( + cluster_name_, table_id_, getLogger(fmt::format("{}({})", configuration_->getEngineName(), table_id_.table_name))) , configuration{configuration_} , object_storage(object_storage_) { configuration->check(context_); - StorageInMemoryMetadata storage_metadata; + auto metadata = getStorageMetadata( + object_storage, configuration, columns_, constraints_, + {}/* format_settings */, ""/* comment */, context_); - if (columns_.empty()) - { - ColumnsDescription columns = Storage::getTableStructureFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_); - storage_metadata.setColumns(columns); - } - else - { - if (configuration->format == "auto") - StorageS3::setFormatFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_); - - storage_metadata.setColumns(columns_); - } - - storage_metadata.setConstraints(constraints_); - setInMemoryMetadata(storage_metadata); - - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + setInMemoryMetadata(std::move(metadata)); } -template -void StorageObjectStorageCluster::updateQueryToSendIfNeeded( +std::string StorageObjectStorageCluster::getName() const +{ + return configuration->getEngineName(); +} + +void StorageObjectStorageCluster::updateQueryToSendIfNeeded( ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) @@ -72,24 +60,32 @@ void StorageObjectStorageCluster::up ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); if (!expression_list) { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected SELECT query from table function {}, got '{}'", - engine_name, queryToString(query)); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected SELECT query from table function {}, got '{}'", + configuration->getEngineName(), queryToString(query)); } - TableFunction::updateStructureAndFormatArgumentsIfNeeded( - expression_list->children, - storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), - configuration->format, - context); + ASTs & args = expression_list->children; + const auto & structure = storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(); + if (args.empty()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected empty list of arguments for {}Cluster table function", + configuration->getEngineName()); + } + + ASTPtr cluster_name_arg = args.front(); + args.erase(args.begin()); + configuration->addStructureAndFormatToArgs(args, structure, configuration->format, context); + args.insert(args.begin(), cluster_name_arg); } -template -RemoteQueryExecutor::Extension -StorageObjectStorageCluster::getTaskIteratorExtension( +RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & local_context) const { - const auto settings = StorageSettings::create(local_context->getSettingsRef()); + const auto settings = configuration->getQuerySettings(local_context); auto iterator = std::make_shared( object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match, @@ -106,17 +102,4 @@ StorageObjectStorageCluster::getTask return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } - -#if USE_AWS_S3 -template class StorageObjectStorageCluster; -#endif - -#if USE_AZURE_BLOB_STORAGE -template class StorageObjectStorageCluster; -#endif - -#if USE_HDFS -template class StorageObjectStorageCluster; -#endif - } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index ac894e14f24..2db8f5c352e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -11,32 +11,25 @@ namespace DB { -class StorageS3Settings; -class StorageAzureBlobSettings; - class Context; -template class StorageObjectStorageCluster : public IStorageCluster { public: - using Storage = StorageObjectStorage; - using TableFunction = TableFunctionObjectStorageCluster; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; StorageObjectStorageCluster( const String & cluster_name_, - const Storage::ConfigurationPtr & configuration_, + ConfigurationPtr configuration_, ObjectStoragePtr object_storage_, - const String & engine_name_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_); - std::string getName() const override { return engine_name; } + std::string getName() const override; - RemoteQueryExecutor::Extension - getTaskIteratorExtension( + RemoteQueryExecutor::Extension getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & context) const override; @@ -53,20 +46,9 @@ private: const ContextPtr & context) override; const String engine_name; - const Storage::ConfigurationPtr configuration; + const StorageObjectStorage::ConfigurationPtr configuration; const ObjectStoragePtr object_storage; NamesAndTypesList virtual_columns; }; - -#if USE_AWS_S3 -using StorageS3Cluster = StorageObjectStorageCluster; -#endif -#if USE_AZURE_BLOB_STORAGE -using StorageAzureBlobCluster = StorageObjectStorageCluster; -#endif -#if USE_HDFS -using StorageHDFSCluster = StorageObjectStorageCluster; -#endif - } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 647575aaa90..34965174bf9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include "StorageObjectStorage.h" #include namespace fs = std::filesystem; @@ -27,6 +28,9 @@ public: ContextPtr local_context, bool with_table_structure); + virtual std::string getTypeName() const = 0; + virtual std::string getEngineName() const = 0; + virtual Path getPath() const = 0; virtual void setPath(const Path & path) = 0; @@ -36,6 +40,9 @@ public: virtual String getDataSourceDescription() = 0; virtual String getNamespace() const = 0; + virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; + virtual void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; bool withWildcard() const; bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h deleted file mode 100644 index 606456011c3..00000000000 --- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once -#include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric ObjectStorageAzureThreads; - extern const Metric ObjectStorageAzureThreadsActive; - extern const Metric ObjectStorageAzureThreadsScheduled; - - extern const Metric ObjectStorageS3Threads; - extern const Metric ObjectStorageS3ThreadsActive; - extern const Metric ObjectStorageS3ThreadsScheduled; -} - -namespace DB -{ - -struct StorageObjectStorageSettings -{ - bool truncate_on_insert; - bool create_new_file_on_insert; - bool schema_inference_use_cache; - SchemaInferenceMode schema_inference_mode; - bool skip_empty_files; - size_t list_object_keys_size; - bool throw_on_zero_files_match; - bool ignore_non_existent_file; -}; - -struct S3StorageSettings -{ - static StorageObjectStorageSettings create(const Settings & settings) - { - return StorageObjectStorageSettings{ - .truncate_on_insert = settings.s3_truncate_on_insert, - .create_new_file_on_insert = settings.s3_create_new_file_on_insert, - .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3, - .schema_inference_mode = settings.schema_inference_mode, - .skip_empty_files = settings.s3_skip_empty_files, - .list_object_keys_size = settings.s3_list_object_keys_size, - .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, - .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist, - }; - } - - static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_s3"; - - static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT - static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT - static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT -}; - -struct AzureStorageSettings -{ - static StorageObjectStorageSettings create(const Settings & settings) - { - return StorageObjectStorageSettings{ - .truncate_on_insert = settings.azure_truncate_on_insert, - .create_new_file_on_insert = settings.azure_create_new_file_on_insert, - .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, - .schema_inference_mode = settings.schema_inference_mode, - .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure - .list_object_keys_size = settings.azure_list_object_keys_size, - .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, - .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist, - }; - } - - static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_azure"; - - static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageAzureThreads; } /// NOLINT - static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageAzureThreadsActive; } /// NOLINT - static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageAzureThreadsScheduled; } /// NOLINT -}; - -struct HDFSStorageSettings -{ - static StorageObjectStorageSettings create(const Settings & settings) - { - return StorageObjectStorageSettings{ - .truncate_on_insert = settings.hdfs_truncate_on_insert, - .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, - .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, - .schema_inference_mode = settings.schema_inference_mode, - .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs - .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs - .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, - .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, - }; - } - - static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_hdfs"; - - /// TODO: s3 -> hdfs - static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT - static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT - static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT -}; - -} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 42371764920..62367a6b933 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -103,7 +103,6 @@ void StorageObjectStorageSink::release() PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, StorageObjectStorageConfigurationPtr configuration_, - const StorageObjectStorageSettings & query_settings_, std::optional format_settings_, const Block & sample_block_, ContextPtr context_, @@ -111,7 +110,7 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( : PartitionedSink(partition_by, context_, sample_block_) , object_storage(object_storage_) , configuration(configuration_) - , query_settings(query_settings_) + , query_settings(configuration_->getQuerySettings(context_)) , format_settings(format_settings_) , sample_block(sample_block_) , context(context_) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 38805332a35..6c2f73e40e3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include @@ -47,7 +46,6 @@ public: PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, StorageObjectStorageConfigurationPtr configuration_, - const StorageObjectStorageSettings & query_settings_, std::optional format_settings_, const Block & sample_block_, ContextPtr context_, @@ -61,7 +59,7 @@ private: ObjectStoragePtr object_storage; StorageObjectStorageConfigurationPtr configuration; - const StorageObjectStorageSettings query_settings; + const StorageObjectStorage::QuerySettings query_settings; const std::optional format_settings; const Block sample_block; const ContextPtr context; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 82824b0e7f7..3101a7ebf51 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -20,6 +19,13 @@ namespace ProfileEvents extern const Event EngineFileLikeReadFiles; } +namespace CurrentMetrics +{ + extern const Metric StorageObjectStorageThreads; + extern const Metric StorageObjectStorageThreadsActive; + extern const Metric StorageObjectStorageThreadsScheduled; +} + namespace DB { @@ -37,16 +43,12 @@ StorageObjectStorageSource::StorageObjectStorageSource( ConfigurationPtr configuration_, const ReadFromFormatInfo & info, std::optional format_settings_, - const StorageObjectStorageSettings & query_settings_, + const StorageObjectStorage::QuerySettings & query_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, bool need_only_count_, - SchemaCache & schema_cache_, - std::shared_ptr reader_pool_, - CurrentMetrics::Metric metric_threads_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_) + SchemaCache & schema_cache_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) @@ -57,13 +59,14 @@ StorageObjectStorageSource::StorageObjectStorageSource( , max_block_size(max_block_size_) , need_only_count(need_only_count_) , read_from_format_info(info) - , create_reader_pool(reader_pool_) + , create_reader_pool(std::make_shared( + CurrentMetrics::StorageObjectStorageThreads, + CurrentMetrics::StorageObjectStorageThreadsActive, + CurrentMetrics::StorageObjectStorageThreadsScheduled, + 1/* max_threads */)) , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(schema_cache_) - , metric_threads(metric_threads_) - , metric_threads_active(metric_threads_active_) - , metric_threads_scheduled(metric_threads_scheduled_) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) { } @@ -76,26 +79,23 @@ StorageObjectStorageSource::~StorageObjectStorageSource() std::shared_ptr StorageObjectStorageSource::createFileIterator( ConfigurationPtr configuration, ObjectStoragePtr object_storage, - const StorageObjectStorageSettings & settings, bool distributed_processing, const ContextPtr & local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, - CurrentMetrics::Metric metric_threads_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_, std::function file_progress_callback) { if (distributed_processing) return std::make_shared( local_context->getReadTaskCallback(), - local_context->getSettingsRef().max_threads, - metric_threads_, metric_threads_active_, metric_threads_scheduled_); + local_context->getSettingsRef().max_threads); if (configuration->isNamespaceWithGlobs()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); + auto settings = configuration->getQuerySettings(local_context); + if (configuration->isPathWithGlobs()) { /// Iterate through disclosed globs and make a source for each file @@ -568,7 +568,8 @@ StorageObjectStorageSource::ReaderHolder::ReaderHolder( { } -StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept +StorageObjectStorageSource::ReaderHolder & +StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept { /// The order of destruction is important. /// reader uses pipeline, pipeline uses read_buf. @@ -581,15 +582,15 @@ StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHol } StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( - const ReadTaskCallback & callback_, - size_t max_threads_count, - CurrentMetrics::Metric metric_threads_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_) + const ReadTaskCallback & callback_, size_t max_threads_count) : IIterator("ReadTaskIterator") , callback(callback_) { - ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count); + ThreadPool pool( + CurrentMetrics::StorageObjectStorageThreads, + CurrentMetrics::StorageObjectStorageThreadsActive, + CurrentMetrics::StorageObjectStorageThreadsScheduled, max_threads_count); + auto pool_scheduler = threadPoolCallbackRunnerUnsafe(pool, "ReadTaskIter"); std::vector> keys; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index f75bfc390bb..3c2cc3f80cd 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -28,16 +27,12 @@ public: ConfigurationPtr configuration, const ReadFromFormatInfo & info, std::optional format_settings_, - const StorageObjectStorageSettings & query_settings_, + const StorageObjectStorage::QuerySettings & query_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, bool need_only_count_, - SchemaCache & schema_cache_, - std::shared_ptr reader_pool_, - CurrentMetrics::Metric metric_threads_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_); + SchemaCache & schema_cache_); ~StorageObjectStorageSource() override; @@ -53,15 +48,11 @@ public: static std::shared_ptr createFileIterator( ConfigurationPtr configuration, ObjectStoragePtr object_storage, - const StorageObjectStorageSettings & settings, bool distributed_processing, const ContextPtr & local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, ObjectInfos * read_keys, - CurrentMetrics::Metric metric_threads_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_, std::function file_progress_callback = {}); protected: @@ -69,7 +60,7 @@ protected: ObjectStoragePtr object_storage; const ConfigurationPtr configuration; const std::optional format_settings; - const StorageObjectStorageSettings query_settings; + const StorageObjectStorage::QuerySettings query_settings; const UInt64 max_block_size; const bool need_only_count; const ReadFromFormatInfo read_from_format_info; @@ -79,10 +70,6 @@ protected: SchemaCache & schema_cache; bool initialized = false; - const CurrentMetrics::Metric metric_threads; - const CurrentMetrics::Metric metric_threads_active; - const CurrentMetrics::Metric metric_threads_scheduled; - size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); @@ -149,12 +136,7 @@ protected: class StorageObjectStorageSource::ReadTaskIterator : public IIterator { public: - ReadTaskIterator( - const ReadTaskCallback & callback_, - size_t max_threads_count, - CurrentMetrics::Metric metric_threads_, - CurrentMetrics::Metric metric_threads_active_, - CurrentMetrics::Metric metric_threads_scheduled_); + ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); size_t estimatedKeysCount() override { return buffer.size(); } diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index 9caab709081..94d6dadee3b 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -1,8 +1,6 @@ #include #include #include -#include - namespace DB { @@ -15,15 +13,15 @@ namespace ErrorCodes std::optional checkAndGetNewFileOnInsertIfNeeded( const IObjectStorage & object_storage, const StorageObjectStorageConfiguration & configuration, - const StorageObjectStorageSettings & query_settings, + const StorageObjectStorage::QuerySettings & settings, const String & key, size_t sequence_number) { - if (query_settings.truncate_on_insert + if (settings.truncate_on_insert || !object_storage.exists(StoredObject(key))) return std::nullopt; - if (query_settings.create_new_file_on_insert) + if (settings.create_new_file_on_insert) { auto pos = key.find_first_of('.'); String new_key; @@ -45,4 +43,38 @@ std::optional checkAndGetNewFileOnInsertIfNeeded( configuration.getNamespace(), key); } +StorageInMemoryMetadata getStorageMetadata( + ObjectStoragePtr object_storage, + const StorageObjectStorageConfigurationPtr & configuration, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints, + std::optional format_settings, + const String & comment, + const ContextPtr & context) +{ + StorageInMemoryMetadata storage_metadata; + if (columns.empty()) + { + auto fetched_columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context); + storage_metadata.setColumns(fetched_columns); + } + else if (!columns.hasOnlyOrdinary()) + { + /// We don't allow special columns. + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Special columns are not supported for {} storage" + "like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName()); + } + else + { + if (configuration->format == "auto") + StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context); + + storage_metadata.setColumns(columns); + } + storage_metadata.setConstraints(constraints); + storage_metadata.setComment(comment); + return storage_metadata; +} + } diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h index afc0f31a33f..37bd49a77c0 100644 --- a/src/Storages/ObjectStorage/Utils.h +++ b/src/Storages/ObjectStorage/Utils.h @@ -1,18 +1,30 @@ #pragma once #include +#include "StorageObjectStorage.h" namespace DB { class IObjectStorage; class StorageObjectStorageConfiguration; +using StorageObjectStorageConfigurationPtr = std::shared_ptr; struct StorageObjectStorageSettings; std::optional checkAndGetNewFileOnInsertIfNeeded( const IObjectStorage & object_storage, const StorageObjectStorageConfiguration & configuration, - const StorageObjectStorageSettings & query_settings, + const StorageObjectStorage::QuerySettings & settings, const std::string & key, size_t sequence_number); + +StorageInMemoryMetadata getStorageMetadata( + ObjectStoragePtr object_storage, + const StorageObjectStorageConfigurationPtr & configuration, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints, + std::optional format_settings, + const String & comment, + const ContextPtr & context); + } diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index 3271b766f68..06b8aefb716 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -2,22 +2,23 @@ #include #include #include +#include #include #include namespace DB { +#if USE_AWS_S3 || USE_AZURE_BLOB_STORAGE || USE_HDFS + namespace ErrorCodes { extern const int BAD_ARGUMENTS; } -template -static std::shared_ptr> createStorageObjectStorage( +static std::shared_ptr createStorageObjectStorage( const StorageFactory::Arguments & args, - typename StorageObjectStorage::ConfigurationPtr configuration, - const String & engine_name, + typename StorageObjectStorage::ConfigurationPtr configuration, ContextPtr context) { auto & engine_args = args.engine_args; @@ -54,10 +55,9 @@ static std::shared_ptr> createStorageObjec if (args.storage_def->partition_by) partition_by = args.storage_def->partition_by->clone(); - return std::make_shared>( + return std::make_shared( configuration, configuration->createObjectStorage(context), - engine_name, args.getContext(), args.table_id, args.columns, @@ -68,6 +68,8 @@ static std::shared_ptr> createStorageObjec partition_by); } +#endif + #if USE_AZURE_BLOB_STORAGE void registerStorageAzure(StorageFactory & factory) { @@ -76,7 +78,7 @@ void registerStorageAzure(StorageFactory & factory) auto context = args.getLocalContext(); auto configuration = std::make_shared(); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); - return createStorageObjectStorage(args, configuration, "Azure", context); + return createStorageObjectStorage(args, configuration, context); }, { .supports_settings = true, @@ -95,7 +97,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) auto context = args.getLocalContext(); auto configuration = std::make_shared(); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); - return createStorageObjectStorage(args, configuration, name, context); + return createStorageObjectStorage(args, configuration, context); }, { .supports_settings = true, @@ -130,7 +132,7 @@ void registerStorageHDFS(StorageFactory & factory) auto context = args.getLocalContext(); auto configuration = std::make_shared(); StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); - return createStorageObjectStorage(args, configuration, "HDFS", context); + return createStorageObjectStorage(args, configuration, context); }, { .supports_settings = true, diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index c1b45108b36..5a1f0f6dd04 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -7,7 +7,6 @@ #include #include #include -#include #include @@ -21,7 +20,7 @@ struct ObjectMetadata; class StorageS3QueueSource : public ISource, WithContext { public: - using Storage = StorageObjectStorage; + using Storage = StorageObjectStorage; using ConfigurationPtr = Storage::ConfigurationPtr; using GlobIterator = StorageObjectStorageSource::GlobIterator; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index c5799d23abd..6b504b0d986 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -37,13 +37,6 @@ namespace ProfileEvents extern const Event S3ListObjects; } -namespace CurrentMetrics -{ - extern const Metric ObjectStorageS3Threads; - extern const Metric ObjectStorageS3ThreadsActive; - extern const Metric ObjectStorageS3ThreadsScheduled; -} - namespace DB { @@ -151,14 +144,14 @@ StorageS3Queue::StorageS3Queue( StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context_); + auto columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context_); storage_metadata.setColumns(columns); } else { if (configuration->format == "auto") { - StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_); + StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_); } storage_metadata.setColumns(columns_); } @@ -370,26 +363,18 @@ std::shared_ptr StorageS3Queue::createSource( size_t max_block_size, ContextPtr local_context) { - auto threadpool = std::make_shared(CurrentMetrics::ObjectStorageS3Threads, - CurrentMetrics::ObjectStorageS3ThreadsActive, - CurrentMetrics::ObjectStorageS3ThreadsScheduled, - /* max_threads */1); auto internal_source = std::make_unique( getName(), object_storage, configuration, info, format_settings, - S3StorageSettings::create(local_context->getSettingsRef()), + configuration->getQuerySettings(local_context), local_context, max_block_size, file_iterator, false, - Storage::getSchemaCache(local_context), - threadpool, - CurrentMetrics::ObjectStorageS3Threads, - CurrentMetrics::ObjectStorageS3ThreadsActive, - CurrentMetrics::ObjectStorageS3ThreadsScheduled); + StorageObjectStorage::getSchemaCache(local_context, configuration->getTypeName())); auto file_deleter = [=, this](const std::string & path) mutable { @@ -596,7 +581,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { - auto settings = S3StorageSettings::create(local_context->getSettingsRef()); + auto settings = configuration->getQuerySettings(local_context); auto glob_iterator = std::make_unique( object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match); diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 72c41a6a694..1464e15ebf2 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -21,8 +21,7 @@ class S3QueueFilesMetadata; class StorageS3Queue : public IStorage, WithContext { public: - using Storage = StorageObjectStorage; - using ConfigurationPtr = Storage::ConfigurationPtr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; StorageS3Queue( std::unique_ptr s3queue_settings_, diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index 9ef64f2b90d..a2d3f342a63 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -74,14 +77,14 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C { fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File"); #if USE_AWS_S3 - fillDataImpl(res_columns, StorageS3::getSchemaCache(context), "S3"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageS3Configuration::type_name), "S3"); #endif #if USE_HDFS - fillDataImpl(res_columns, StorageHDFS::getSchemaCache(context), "HDFS"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageHDFSConfiguration::type_name), "HDFS"); #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); #if USE_AZURE_BLOB_STORAGE - fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); /// FIXME + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureBlobConfiguration::type_name), "Azure"); #endif } diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 8cbd855bb96..02c8c623e61 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -39,7 +39,7 @@ protected: columns = cached_columns; StoragePtr storage = Storage::create( - configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name), + configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt, LoadingStrictnessLevel::CREATE); storage->startup(); diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 9223642a7e6..2b5c774ff78 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -27,27 +27,27 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -template +template ObjectStoragePtr TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const + Definition, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const { if (!object_storage) object_storage = configuration->createObjectStorage(context, create_readonly); return object_storage; } -template +template StorageObjectStorageConfigurationPtr TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::getConfiguration() const + Definition, Configuration>::getConfiguration() const { if (!configuration) configuration = std::make_shared(); return configuration; } -template +template std::vector TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const + Definition, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const { auto & table_function_node = query_node_table_function->as(); auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); @@ -63,22 +63,21 @@ std::vector TableFunctionObjectStorage< return result; } -template -void TableFunctionObjectStorage::updateStructureAndFormatArgumentsIfNeeded( +template +void TableFunctionObjectStorage::updateStructureAndFormatArgumentsIfNeeded( ASTs & args, const String & structure, const String & format, const ContextPtr & context) { - Configuration::addStructureAndFormatToArgs(args, structure, format, context); + Configuration().addStructureAndFormatToArgs(args, structure, format, context); } -template -void TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) +template +void TableFunctionObjectStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true); } -template -void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) +template +void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) { /// Clone ast function, because we can modify its arguments like removing headers. auto ast_copy = ast_function->clone(); @@ -90,38 +89,38 @@ void TableFunctionObjectStorage::par parseArgumentsImpl(args, context); } -template +template ColumnsDescription TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const + Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const { chassert(configuration); if (configuration->structure == "auto") { context->checkAccess(getSourceAccessType()); auto storage = getObjectStorage(context, !is_insert_query); - return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context); + return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context); } return parseColumnsListFromString(configuration->structure, context); } -template +template bool TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context) + Definition, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context) { chassert(configuration); return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); } -template +template std::unordered_set TableFunctionObjectStorage< - Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const + Definition, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const { return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); } -template -StoragePtr TableFunctionObjectStorage::executeImpl( +template +StoragePtr TableFunctionObjectStorage::executeImpl( const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, @@ -137,10 +136,9 @@ StoragePtr TableFunctionObjectStorage>( + StoragePtr storage = std::make_shared( configuration, getObjectStorage(context, !is_insert_query), - Definition::storage_type_name, context, StorageID(getDatabaseName(), table_name), columns, @@ -159,7 +157,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) { UNUSED(factory); #if USE_AWS_S3 - factory.registerFunction>( + factory.registerFunction>( { .documentation = { @@ -170,7 +168,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) .allow_readonly = false }); - factory.registerFunction>( + factory.registerFunction>( { .documentation = { @@ -181,7 +179,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) .allow_readonly = false }); - factory.registerFunction>( + factory.registerFunction>( { .documentation = { @@ -191,7 +189,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) .categories{"DataLake"}}, .allow_readonly = false }); - factory.registerFunction>( + factory.registerFunction>( { .documentation = { @@ -204,7 +202,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) #endif #if USE_AZURE_BLOB_STORAGE - factory.registerFunction>( + factory.registerFunction>( { .documentation = { @@ -220,7 +218,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) }); #endif #if USE_HDFS - factory.registerFunction>( + factory.registerFunction>( { .allow_readonly = false }); @@ -228,21 +226,21 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) } #if USE_AZURE_BLOB_STORAGE -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; #endif #if USE_AWS_S3 -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; #endif #if USE_HDFS -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; #endif } diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index 9022f6e577f..bd43cae3697 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -85,7 +85,7 @@ struct HDFSDefinition " - uri, format, structure, compression_method\n"; }; -template +template class TableFunctionObjectStorage : public ITableFunction { public: @@ -142,14 +142,14 @@ protected: }; #if USE_AWS_S3 -using TableFunctionS3 = TableFunctionObjectStorage; +using TableFunctionS3 = TableFunctionObjectStorage; #endif #if USE_AZURE_BLOB_STORAGE -using TableFunctionAzureBlob = TableFunctionObjectStorage; +using TableFunctionAzureBlob = TableFunctionObjectStorage; #endif #if USE_HDFS -using TableFunctionHDFS = TableFunctionObjectStorage; +using TableFunctionHDFS = TableFunctionObjectStorage; #endif } diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index 909ace788eb..ce78076dd21 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -14,8 +14,8 @@ namespace DB { -template -StoragePtr TableFunctionObjectStorageCluster::executeImpl( +template +StoragePtr TableFunctionObjectStorageCluster::executeImpl( const ASTPtr & /*function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const { @@ -34,10 +34,9 @@ StoragePtr TableFunctionObjectStorageClustergetClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) { /// On worker node this filename won't contains globs - storage = std::make_shared>( + storage = std::make_shared( configuration, object_storage, - Definition::storage_type_name, context, StorageID(Base::getDatabaseName(), table_name), columns, @@ -49,11 +48,10 @@ StoragePtr TableFunctionObjectStorageCluster>( + storage = std::make_shared( ITableFunctionCluster::cluster_name, configuration, object_storage, - Definition::storage_type_name, StorageID(Base::getDatabaseName(), table_name), columns, ConstraintsDescription{}, @@ -107,14 +105,14 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory) } #if USE_AWS_S3 -template class TableFunctionObjectStorageCluster; +template class TableFunctionObjectStorageCluster; #endif #if USE_AZURE_BLOB_STORAGE -template class TableFunctionObjectStorageCluster; +template class TableFunctionObjectStorageCluster; #endif #if USE_HDFS -template class TableFunctionObjectStorageCluster; +template class TableFunctionObjectStorageCluster; #endif } diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h index 21c2f8995dc..a8bc11b5e40 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.h +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -56,8 +56,8 @@ struct HDFSClusterDefinition " - cluster_name, uri, format, structure, compression_method\n"; }; -template -class TableFunctionObjectStorageCluster : public ITableFunctionCluster> +template +class TableFunctionObjectStorageCluster : public ITableFunctionCluster> { public: static constexpr auto name = Definition::name; @@ -67,7 +67,7 @@ public: String getSignature() const override { return signature; } protected: - using Base = TableFunctionObjectStorage; + using Base = TableFunctionObjectStorage; StoragePtr executeImpl( const ASTPtr & ast_function, @@ -86,14 +86,14 @@ protected: }; #if USE_AWS_S3 -using TableFunctionS3Cluster = TableFunctionObjectStorageCluster; +using TableFunctionS3Cluster = TableFunctionObjectStorageCluster; #endif #if USE_AZURE_BLOB_STORAGE -using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; +using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; #endif #if USE_HDFS -using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster; +using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster; #endif } From 9eb9a76592dada103c40baa2c4acf5a3918b8e95 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 22 Apr 2024 14:18:46 +0100 Subject: [PATCH 093/651] Fix --- .../ObjectStorage/AzureBlob/Configuration.cpp | 1 + .../DataLakes/IStorageDataLake.h | 2 +- .../ObjectStorage/HDFS/Configuration.cpp | 1 + .../ObjectStorage/S3/Configuration.cpp | 1 + .../ObjectStorage/StorageObjectStorage.cpp | 47 +++++++++++-------- .../ObjectStorage/StorageObjectStorage.h | 10 +++- .../StorageObjectStorageCluster.cpp | 9 ++-- .../StorageObjectStorageConfiguration.cpp | 5 ++ .../StorageObjectStorageConfiguration.h | 2 +- src/Storages/ObjectStorage/Utils.cpp | 33 ++++++------- src/Storages/ObjectStorage/Utils.h | 10 ++-- src/Storages/S3Queue/StorageS3Queue.cpp | 21 +++------ .../TableFunctionObjectStorage.cpp | 5 +- 13 files changed, 80 insertions(+), 67 deletions(-) diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp index c9bc59d62aa..f268b812c03 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp @@ -77,6 +77,7 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const url_to_check = Poco::URI(connection_url); context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check); + StorageObjectStorageConfiguration::check(context); } StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index e1851775925..144cc16939c 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -89,7 +89,7 @@ public: { ConfigurationPtr configuration = base_configuration->clone(); configuration->getPaths() = metadata->getDataFiles(); - return Storage::getTableStructureFromData( + return Storage::resolveSchemaFromData( object_storage_, configuration, format_settings_, local_context); } } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 0062ac969ac..12e3f3adb12 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -34,6 +34,7 @@ void StorageHDFSConfiguration::check(ContextPtr context) const { context->getRemoteHostFilter().checkURL(Poco::URI(url)); checkHDFSURL(fs::path(url) / path.substr(1)); + StorageObjectStorageConfiguration::check(context); } ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 139d9004f8e..bfd61c647f8 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -54,6 +54,7 @@ void StorageS3Configuration::check(ContextPtr context) const validateNamespace(url.bucket); context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri); context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); + StorageObjectStorageConfiguration::check(context); } void StorageS3Configuration::validateNamespace(const String & name) const diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 441639629a3..36a8beba41a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -47,17 +47,19 @@ StorageObjectStorage::StorageObjectStorage( , distributed_processing(distributed_processing_) , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName()))) { - FormatFactory::instance().checkFormatName(configuration->format); + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context); configuration->check(context); + StorageInMemoryMetadata metadata; + metadata.setColumns(columns); + metadata.setConstraints(constraints_); + metadata.setComment(comment); + StoredObjects objects; for (const auto & key : configuration->getPaths()) objects.emplace_back(key); - auto metadata = getStorageMetadata( - object_storage_, configuration_, columns_, - constraints_, format_settings_, comment, context); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); setInMemoryMetadata(std::move(metadata)); } @@ -224,7 +226,7 @@ std::unique_ptr StorageObjectStorage::createReadBufferIterat format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context); } -ColumnsDescription StorageObjectStorage::getTableStructureFromData( +ColumnsDescription StorageObjectStorage::resolveSchemaFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, @@ -233,20 +235,11 @@ ColumnsDescription StorageObjectStorage::getTableStructureFromData( ObjectInfos read_keys; auto read_buffer_iterator = createReadBufferIterator( object_storage, configuration, format_settings, read_keys, context); - - if (configuration->format == "auto") - { - auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context); - configuration->format = format; - return columns; - } - else - { - return readSchemaFromFormat(configuration->format, format_settings, *read_buffer_iterator, context); - } + return readSchemaFromFormat( + configuration->format, format_settings, *read_buffer_iterator, context); } -void StorageObjectStorage::setFormatFromData( +std::string StorageObjectStorage::resolveFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, @@ -255,7 +248,23 @@ void StorageObjectStorage::setFormatFromData( ObjectInfos read_keys; auto read_buffer_iterator = createReadBufferIterator( object_storage, configuration, format_settings, read_keys, context); - configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second; + return detectFormatAndReadSchema( + format_settings, *read_buffer_iterator, context).second; +} + +std::pair StorageObjectStorage::resolveSchemaAndFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context) +{ + ObjectInfos read_keys; + auto read_buffer_iterator = createReadBufferIterator( + object_storage, configuration, format_settings, read_keys, context); + + auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context); + configuration->format = format; + return std::pair(columns, format); } SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 3dbe010e406..d46a875bf42 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -95,13 +95,19 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name); - static ColumnsDescription getTableStructureFromData( + static ColumnsDescription resolveSchemaFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, const ContextPtr & context); - static void setFormatFromData( + static std::string resolveFormatFromData( + const ObjectStoragePtr & object_storage, + const ConfigurationPtr & configuration, + const std::optional & format_settings, + const ContextPtr & context); + + static std::pair resolveSchemaAndFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 72a35ae33eb..f98fc32a3cc 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -38,10 +38,13 @@ StorageObjectStorageCluster::StorageObjectStorageCluster( , configuration{configuration_} , object_storage(object_storage_) { + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_); configuration->check(context_); - auto metadata = getStorageMetadata( - object_storage, configuration, columns_, constraints_, - {}/* format_settings */, ""/* comment */, context_); + + StorageInMemoryMetadata metadata; + metadata.setColumns(columns); + metadata.setConstraints(constraints_); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); setInMemoryMetadata(std::move(metadata)); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 61e569cee05..3635269db34 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -30,6 +30,11 @@ void StorageObjectStorageConfiguration::initialize( configuration.initialized = true; } +void StorageObjectStorageConfiguration::check(ContextPtr) const +{ + FormatFactory::instance().checkFormatName(format); +} + StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other) { format = other.format; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h index 34965174bf9..c55362aa8bd 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h @@ -50,7 +50,7 @@ public: bool isNamespaceWithGlobs() const; virtual std::string getPathWithoutGlob() const; - virtual void check(ContextPtr context) const = 0; + virtual void check(ContextPtr context) const; virtual void validateNamespace(const String & /* name */) const {} virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index 94d6dadee3b..2a7236ab196 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -12,7 +12,7 @@ namespace ErrorCodes std::optional checkAndGetNewFileOnInsertIfNeeded( const IObjectStorage & object_storage, - const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorage::Configuration & configuration, const StorageObjectStorage::QuerySettings & settings, const String & key, size_t sequence_number) @@ -43,38 +43,33 @@ std::optional checkAndGetNewFileOnInsertIfNeeded( configuration.getNamespace(), key); } -StorageInMemoryMetadata getStorageMetadata( +void resolveSchemaAndFormat( + ColumnsDescription & columns, + std::string & format, ObjectStoragePtr object_storage, const StorageObjectStorageConfigurationPtr & configuration, - const ColumnsDescription & columns, - const ConstraintsDescription & constraints, std::optional format_settings, - const String & comment, const ContextPtr & context) { - StorageInMemoryMetadata storage_metadata; if (columns.empty()) { - auto fetched_columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context); - storage_metadata.setColumns(fetched_columns); + if (format == "auto") + std::tie(columns, format) = StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); + else + columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context); } - else if (!columns.hasOnlyOrdinary()) + else if (format == "auto") + { + format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context); + } + + if (!columns.hasOnlyOrdinary()) { /// We don't allow special columns. throw Exception(ErrorCodes::BAD_ARGUMENTS, "Special columns are not supported for {} storage" "like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName()); } - else - { - if (configuration->format == "auto") - StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context); - - storage_metadata.setColumns(columns); - } - storage_metadata.setConstraints(constraints); - storage_metadata.setComment(comment); - return storage_metadata; } } diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h index 37bd49a77c0..3a752e6b8f0 100644 --- a/src/Storages/ObjectStorage/Utils.h +++ b/src/Storages/ObjectStorage/Utils.h @@ -12,19 +12,17 @@ struct StorageObjectStorageSettings; std::optional checkAndGetNewFileOnInsertIfNeeded( const IObjectStorage & object_storage, - const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorage::Configuration & configuration, const StorageObjectStorage::QuerySettings & settings, const std::string & key, size_t sequence_number); - -StorageInMemoryMetadata getStorageMetadata( +void resolveSchemaAndFormat( + ColumnsDescription & columns, + std::string & format, ObjectStoragePtr object_storage, const StorageObjectStorageConfigurationPtr & configuration, - const ColumnsDescription & columns, - const ConstraintsDescription & constraints, std::optional format_settings, - const String & comment, const ContextPtr & context); } diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 6b504b0d986..229c40396c5 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -141,24 +142,14 @@ StorageS3Queue::StorageS3Queue( FormatFactory::instance().checkFormatName(configuration->format); configuration->check(context_); - StorageInMemoryMetadata storage_metadata; - if (columns_.empty()) - { - auto columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context_); - storage_metadata.setColumns(columns); - } - else - { - if (configuration->format == "auto") - { - StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_); - } - storage_metadata.setColumns(columns_); - } + ColumnsDescription columns{columns_}; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_); + configuration->check(context_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 2b5c774ff78..06676a8adfa 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -98,7 +99,9 @@ ColumnsDescription TableFunctionObjectStorage< { context->checkAccess(getSourceAccessType()); auto storage = getObjectStorage(context, !is_insert_query); - return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context); + ColumnsDescription columns; + resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context); + return columns; } return parseColumnsListFromString(configuration->structure, context); From 02239dd9f92894ebccc4a01f8386ac393b7985c7 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 25 Apr 2024 11:21:22 +0000 Subject: [PATCH 094/651] Compare aliases for TableNode --- src/Analyzer/TableNode.cpp | 2 +- .../03130_analyzer_self_join_group_by.reference | 6 ++++++ .../03130_analyzer_self_join_group_by.sql | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference create mode 100644 tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index daf5db08551..11d1a280a56 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -56,7 +56,7 @@ bool TableNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return storage_id == rhs_typed.storage_id && table_expression_modifiers == rhs_typed.table_expression_modifiers && - temporary_table_name == rhs_typed.temporary_table_name; + temporary_table_name == rhs_typed.temporary_table_name && getAlias() == rhs_typed.getAlias(); } void TableNode::updateTreeHashImpl(HashState & state, CompareOptions) const diff --git a/tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference new file mode 100644 index 00000000000..095df5749cd --- /dev/null +++ b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.reference @@ -0,0 +1,6 @@ +1 +2 +3 +1 1 +2 2 +3 3 diff --git a/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql new file mode 100644 index 00000000000..562855ad954 --- /dev/null +++ b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (x Int32) ENGINE = MergeTree ORDER BY x; +INSERT INTO t1 VALUES (1), (2), (3); + +SET allow_experimental_analyzer = 1; + +SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t0.x = t2.x GROUP BY t0.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x; -- { serverError NOT_AN_AGGREGATE } +SELECT t1.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE } +SELECT x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE } +SELECT x FROM t1 JOIN t1 as t2 USING (x) GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE } + +SELECT t1.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x ORDER BY ALL; +SELECT x, sum(t2.x) FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY ALL; From 16bc8aa0b1a68bd2422026ea7205a3746029e86b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 25 Apr 2024 16:08:13 +0200 Subject: [PATCH 095/651] Fxi --- src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp index 3635269db34..89c15085274 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp @@ -26,7 +26,6 @@ void StorageObjectStorageConfiguration::initialize( else FormatFactory::instance().checkFormatName(configuration.format); - configuration.check(local_context); configuration.initialized = true; } From 193ff63f87a2cef958983b2ef106a7c52f6db8be Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 25 Apr 2024 22:44:12 +0200 Subject: [PATCH 096/651] Fix --- .../ObjectStorage/StorageObjectStorage.cpp | 37 ++++++++++++++----- src/Storages/S3Queue/StorageS3Queue.cpp | 1 + 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 36a8beba41a..f5bfb9d2a65 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -269,20 +269,37 @@ std::pair StorageObjectStorage::resolveSchemaAn SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context) { - static SchemaCache schema_cache( - context->getConfigRef().getUInt( - "schema_inference_cache_max_elements_for_" + configuration->getTypeName(), - DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; + return getSchemaCache(context, configuration->getTypeName()); } SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name) { - static SchemaCache schema_cache( - context->getConfigRef().getUInt( - "schema_inference_cache_max_elements_for_" + storage_type_name, - DEFAULT_SCHEMA_CACHE_ELEMENTS)); - return schema_cache; + if (storage_type_name == "s3") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_s3", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else if (storage_type_name == "hdfs") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_hdfs", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else if (storage_type_name == "azure") + { + static SchemaCache schema_cache( + context->getConfigRef().getUInt( + "schema_inference_cache_max_elements_for_azure", + DEFAULT_SCHEMA_CACHE_ELEMENTS)); + return schema_cache; + } + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name); } } diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 229c40396c5..e84dabecf3b 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -151,6 +151,7 @@ StorageS3Queue::StorageS3Queue( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); From 69a3aa7bcf0e7a2d311a076493715cf3b1b3a418 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 26 Apr 2024 11:01:32 +0000 Subject: [PATCH 097/651] Implement Dynamic data type --- docs/en/sql-reference/data-types/dynamic.md | 157 ++++ src/Columns/ColumnArray.cpp | 10 + src/Columns/ColumnArray.h | 3 + src/Columns/ColumnCompressed.h | 3 + src/Columns/ColumnConst.cpp | 9 + src/Columns/ColumnConst.h | 4 + src/Columns/ColumnDynamic.cpp | 785 ++++++++++++++++++ src/Columns/ColumnDynamic.h | 363 ++++++++ src/Columns/ColumnMap.cpp | 9 + src/Columns/ColumnMap.h | 3 + src/Columns/ColumnNullable.cpp | 9 + src/Columns/ColumnNullable.h | 3 + src/Columns/ColumnSparse.cpp | 9 + src/Columns/ColumnSparse.h | 3 + src/Columns/ColumnTuple.cpp | 28 + src/Columns/ColumnTuple.h | 3 + src/Columns/ColumnVariant.cpp | 185 ++++- src/Columns/ColumnVariant.h | 34 +- src/Columns/IColumn.cpp | 2 + src/Columns/IColumn.h | 2 + src/Columns/tests/gtest_column_dynamic.cpp | 652 +++++++++++++++ src/Core/Settings.h | 2 + src/Core/TypeId.h | 1 + src/DataTypes/DataTypeArray.cpp | 21 + src/DataTypes/DataTypeArray.h | 7 +- src/DataTypes/DataTypeDynamic.cpp | 144 ++++ src/DataTypes/DataTypeDynamic.h | 53 ++ src/DataTypes/DataTypeFactory.cpp | 1 + src/DataTypes/DataTypeFactory.h | 1 + src/DataTypes/DataTypeMap.h | 2 +- src/DataTypes/DataTypeObject.h | 2 +- src/DataTypes/DataTypeTuple.cpp | 4 +- src/DataTypes/DataTypeTuple.h | 2 +- src/DataTypes/DataTypeVariant.cpp | 23 +- src/DataTypes/DataTypeVariant.h | 4 +- src/DataTypes/IDataType.cpp | 71 +- src/DataTypes/IDataType.h | 30 +- src/DataTypes/ObjectUtils.cpp | 12 +- src/DataTypes/ObjectUtils.h | 4 +- .../Serializations/ISerialization.cpp | 19 + src/DataTypes/Serializations/ISerialization.h | 21 +- .../Serializations/SerializationArray.cpp | 5 +- .../Serializations/SerializationArray.h | 4 +- .../Serializations/SerializationDynamic.cpp | 645 ++++++++++++++ .../Serializations/SerializationDynamic.h | 116 +++ .../SerializationDynamicElement.cpp | 99 +++ .../SerializationDynamicElement.h | 58 ++ .../Serializations/SerializationInterval.cpp | 4 +- .../Serializations/SerializationInterval.h | 5 +- .../SerializationLowCardinality.cpp | 3 +- .../SerializationLowCardinality.h | 3 +- .../Serializations/SerializationMap.cpp | 5 +- .../Serializations/SerializationMap.h | 3 +- .../Serializations/SerializationNamed.cpp | 5 +- .../Serializations/SerializationNamed.h | 3 +- .../Serializations/SerializationNullable.cpp | 5 +- .../Serializations/SerializationNullable.h | 3 +- .../Serializations/SerializationObject.cpp | 5 +- .../Serializations/SerializationObject.h | 3 +- .../Serializations/SerializationSparse.cpp | 7 +- .../Serializations/SerializationSparse.h | 3 +- .../Serializations/SerializationTuple.cpp | 5 +- .../Serializations/SerializationTuple.h | 3 +- .../Serializations/SerializationVariant.cpp | 5 +- .../Serializations/SerializationVariant.h | 3 +- .../SerializationVariantElement.cpp | 28 +- .../SerializationVariantElement.h | 14 +- .../Serializations/SerializationWrapper.cpp | 5 +- .../Serializations/SerializationWrapper.h | 3 +- .../tests/gtest_object_serialization.cpp | 2 +- src/DataTypes/Utils.cpp | 1 + src/Databases/DatabaseReplicated.cpp | 1 + src/Formats/FormatSettings.h | 6 +- src/Formats/NativeReader.cpp | 2 +- src/Functions/FunctionsConversion.cpp | 356 +++++++- src/Functions/dynamicElement.cpp | 172 ++++ src/Functions/dynamicType.cpp | 104 +++ src/Functions/if.cpp | 11 + src/Functions/isNotNull.cpp | 6 +- src/Functions/isNull.cpp | 6 +- src/Functions/variantElement.cpp | 52 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 34 +- src/Interpreters/convertFieldToType.cpp | 11 +- .../parseColumnsListForTableFunction.cpp | 14 +- .../parseColumnsListForTableFunction.h | 2 + src/Parsers/ParserDataType.cpp | 46 +- src/Processors/Formats/IOutputFormat.h | 3 +- .../Algorithms/AggregatingSortedAlgorithm.cpp | 39 +- .../Algorithms/AggregatingSortedAlgorithm.h | 3 +- .../Algorithms/CollapsingSortedAlgorithm.cpp | 19 +- .../Algorithms/CollapsingSortedAlgorithm.h | 2 - .../GraphiteRollupSortedAlgorithm.cpp | 28 +- .../GraphiteRollupSortedAlgorithm.h | 4 +- .../IMergingAlgorithmWithDelayedChunk.h | 2 +- .../IMergingAlgorithmWithSharedChunks.cpp | 5 +- .../IMergingAlgorithmWithSharedChunks.h | 6 +- src/Processors/Merges/Algorithms/MergedData.h | 42 +- .../Algorithms/MergingSortedAlgorithm.cpp | 3 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 17 +- .../Algorithms/ReplacingSortedAlgorithm.h | 2 - .../Algorithms/SummingSortedAlgorithm.cpp | 76 +- .../Algorithms/SummingSortedAlgorithm.h | 4 +- .../VersionedCollapsingAlgorithm.cpp | 15 +- .../Algorithms/VersionedCollapsingAlgorithm.h | 2 - .../Transforms/ColumnGathererTransform.cpp | 34 +- src/Storages/AlterCommands.cpp | 6 +- src/Storages/ColumnsDescription.cpp | 36 +- src/Storages/HDFS/StorageHDFS.h | 2 + src/Storages/HDFS/StorageHDFSCluster.h | 2 + src/Storages/IStorage.h | 4 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 30 + src/Storages/MergeTree/IMergeTreeDataPart.h | 2 + src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.h | 1 + .../MergeTreeDataPartWriterCompact.cpp | 17 +- .../MergeTreeDataPartWriterCompact.h | 4 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 54 +- .../MergeTree/MergeTreeDataPartWriterWide.h | 14 +- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- .../MergeTree/MergeTreeReaderCompact.cpp | 5 +- .../MergeTree/MergeTreeReaderWide.cpp | 107 ++- src/Storages/MergeTree/MergeTreeReaderWide.h | 38 +- src/Storages/MergeTree/MergeTreeSettings.h | 1 + src/Storages/MergeTree/MutateTask.cpp | 16 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 1 + src/Storages/S3Queue/StorageS3Queue.h | 1 + src/Storages/StorageAzureBlob.h | 2 + src/Storages/StorageAzureBlobCluster.h | 2 + src/Storages/StorageBuffer.h | 2 + src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageDistributed.h | 1 + src/Storages/StorageDummy.h | 1 + src/Storages/StorageFile.h | 2 + src/Storages/StorageFileCluster.h | 2 + src/Storages/StorageInMemoryMetadata.cpp | 6 +- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageMaterializedView.h | 1 + src/Storages/StorageMemory.h | 1 + src/Storages/StorageMerge.h | 1 + src/Storages/StorageNull.h | 2 + src/Storages/StorageS3.h | 2 + src/Storages/StorageS3Cluster.h | 2 + src/Storages/StorageSnapshot.cpp | 2 +- src/Storages/StorageURL.h | 2 + src/Storages/StorageURLCluster.h | 2 + src/Storages/getStructureOfRemoteTable.cpp | 2 +- .../02943_variant_read_subcolumns.sh | 2 +- ...03033_dynamic_text_serialization.reference | 55 ++ .../03033_dynamic_text_serialization.sql | 74 ++ .../03034_dynamic_conversions.reference | 63 ++ .../0_stateless/03034_dynamic_conversions.sql | 24 + .../03035_dynamic_sorting.reference | 299 +++++++ .../0_stateless/03035_dynamic_sorting.sql | 80 ++ .../03036_dynamic_read_subcolumns.reference | 57 ++ .../03036_dynamic_read_subcolumns.sh | 62 ++ .../03037_dynamic_merges_1.reference | 120 +++ .../0_stateless/03037_dynamic_merges_1.sh | 61 ++ .../0_stateless/03037_dynamic_merges_2.sh | 45 + .../03038_nested_dynamic_merges.reference | 92 ++ .../03038_nested_dynamic_merges.sh | 53 ++ ...9_dynamic_all_merge_algorithms_1.reference | 88 ++ .../03039_dynamic_all_merge_algorithms_1.sh | 65 ++ ...9_dynamic_all_merge_algorithms_2.reference | 44 + .../03039_dynamic_all_merge_algorithms_2.sh | 50 ++ .../03040_dynamic_type_alters.reference | 526 ++++++++++++ .../0_stateless/03040_dynamic_type_alters.sh | 76 ++ 169 files changed, 6770 insertions(+), 438 deletions(-) create mode 100644 docs/en/sql-reference/data-types/dynamic.md create mode 100644 src/Columns/ColumnDynamic.cpp create mode 100644 src/Columns/ColumnDynamic.h create mode 100644 src/Columns/tests/gtest_column_dynamic.cpp create mode 100644 src/DataTypes/DataTypeDynamic.cpp create mode 100644 src/DataTypes/DataTypeDynamic.h create mode 100644 src/DataTypes/Serializations/SerializationDynamic.cpp create mode 100644 src/DataTypes/Serializations/SerializationDynamic.h create mode 100644 src/DataTypes/Serializations/SerializationDynamicElement.cpp create mode 100644 src/DataTypes/Serializations/SerializationDynamicElement.h create mode 100644 src/Functions/dynamicElement.cpp create mode 100644 src/Functions/dynamicType.cpp create mode 100644 tests/queries/0_stateless/03033_dynamic_text_serialization.reference create mode 100644 tests/queries/0_stateless/03033_dynamic_text_serialization.sql create mode 100644 tests/queries/0_stateless/03034_dynamic_conversions.reference create mode 100644 tests/queries/0_stateless/03034_dynamic_conversions.sql create mode 100644 tests/queries/0_stateless/03035_dynamic_sorting.reference create mode 100644 tests/queries/0_stateless/03035_dynamic_sorting.sql create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference create mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1.reference create mode 100755 tests/queries/0_stateless/03037_dynamic_merges_1.sh create mode 100755 tests/queries/0_stateless/03037_dynamic_merges_2.sh create mode 100644 tests/queries/0_stateless/03038_nested_dynamic_merges.reference create mode 100755 tests/queries/0_stateless/03038_nested_dynamic_merges.sh create mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh create mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters.reference create mode 100755 tests/queries/0_stateless/03040_dynamic_type_alters.sh diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md new file mode 100644 index 00000000000..e20bdad1e79 --- /dev/null +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -0,0 +1,157 @@ +--- +slug: /en/sql-reference/data-types/dynamic +sidebar_position: 56 +sidebar_label: Dynamic +--- + +# Dynamic + +This type allows to store values of any type inside it without knowing all of them in advance. + +To declare a column of `Dynamic` type, use the following syntax: + +``` sql + Dynamic(max_types=N) +``` + +Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic`. If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`. + +:::note +The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`. +::: + +## Creating Dynamic + +Using `Dynamic` type in table column definition: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d) FROM test; +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ Hello, World! │ String │ +│ [1,2,3] │ Array(Int64) │ +└───────────────┴────────────────┘ +``` + +Using CAST from ordinary column: + +```sql +SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ Hello, World! │ String │ +└───────────────┴────────────────┘ +``` + +Using CAST from `Variant` column: + +```sql +SET allow_experimental_variant_type = 1, use_variant_as_common_type = 1; +SELECT multiIf((number % 3) = 0, number, (number % 3) = 1, range(number + 1), NULL)::Dynamic AS d, dynamicType(d) FROM numbers(3) +``` + +```text +┌─d─────┬─dynamicType(d)─┐ +│ 0 │ UInt64 │ +│ [0,1] │ Array(UInt64) │ +│ ᴺᵁᴸᴸ │ None │ +└───────┴────────────────┘ +``` + + +## Reading Dynamic nested types as subcolumns + +`Dynamic` type supports reading a single nested type from a `Dynamic` column using the type name as a subcolumn. +So, if you have column `d Dynamic` you can read a subcolumn of any valid type `T` using syntax `d.T`, +this subcolumn will have type `Nullable(T)` if `T` can be inside `Nullable` and `T` otherwise. This subcolumn will +be the same size as original `Dynamic` column and will contain `NULL` values (or empty values if `T` cannot be inside `Nullable`) +in all rows in which original `Dynamic` column doesn't have type `T`. + +`Dynamic` subcolumns can be also read using function `dynamicElement(dynamic_column, type_name)`. + +Examples: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d), d.String, d.Int64, d.`Array(Int64)`, d.Date, d.`Array(String)` FROM test; +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─d.String──────┬─d.Int64─┬─d.Array(Int64)─┬─d.Date─┬─d.Array(String)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴───────────────┴─────────┴────────────────┴────────┴─────────────────┘ +``` + +```sql +SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`), toTypeName(d.Date), toTypeName(d.`Array(String)`) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(d.String)─┬─toTypeName(d.Int64)─┬─toTypeName(d.Array(Int64))─┬─toTypeName(d.Date)─┬─toTypeName(d.Array(String))─┐ +│ Nullable(String) │ Nullable(Int64) │ Array(Int64) │ Nullable(Date) │ Array(String) │ +└──────────────────────┴─────────────────────┴────────────────────────────┴────────────────────┴─────────────────────────────┘ +``` + +```sql +SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘ +``` + +To know what variant is stored in each row function `dynamicType(dynamic_column)` can be used. It returns `String` with value type name for each row (or `'None'` if row is `NULL`). + +Example: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT dynamicType(d) from test; +``` + +```text +┌─dynamicType(d)─┐ +│ None │ +│ Int64 │ +│ String │ +│ Array(Int64) │ +└────────────────┘ +``` + +## Conversion between Dynamic column and other columns + +There are 4 possible conversions that can be performed with `Dynamic` column. + +### Converting an ordinary column to a Variant column + +```sql +SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); +``` + +```text +┌─d─────────────┬─dynamicType(d)─┐ +│ Hello, World! │ String │ +└───────────────┴────────────────┘ +``` + + + + + diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 7b268b80116..29773492dc9 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -1289,4 +1289,14 @@ size_t ColumnArray::getNumberOfDimensions() const return 1 + nested_array->getNumberOfDimensions(); /// Every modern C++ compiler optimizes tail recursion. } +void ColumnArray::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getDataPtr()); + + data->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + } diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 230d8830265..53eb5166df8 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -175,6 +175,9 @@ public: size_t getNumberOfDimensions() const; + bool hasDynamicStructure() const override { return getData().hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: WrappedPtr data; WrappedPtr offsets; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 6763410b46d..934adf07cf4 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -122,6 +122,9 @@ public: UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); } void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); } + bool hasDynamicStructure() const override { throwMustBeDecompressed(); } + void takeDynamicStructureFromSourceColumns(const Columns &) override { throwMustBeDecompressed(); } + protected: size_t rows; size_t bytes; diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index f2cea83db0e..cf3f448516c 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -159,6 +159,15 @@ void ColumnConst::compareColumn( std::fill(compare_results.begin(), compare_results.end(), res); } +void ColumnConst::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getDataColumnPtr()); + data->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value) { auto data = column->cloneEmpty(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 4a3d40ca0d2..042468cbbcc 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -306,6 +306,10 @@ public: T getValue() const { return static_cast(getField().safeGet()); } bool isCollationSupported() const override { return data->isCollationSupported(); } + + bool hasDynamicStructure() const override { return data->hasDynamicStructure(); } + + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; }; ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value); diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp new file mode 100644 index 00000000000..293055b43fc --- /dev/null +++ b/src/Columns/ColumnDynamic.cpp @@ -0,0 +1,785 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int PARAMETER_OUT_OF_BOUND; +} + + +ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) +{ + /// Create empty Variant. + variant_info.variant_type = std::make_shared(DataTypes{}); + variant_info.variant_name = variant_info.variant_type->getName(); + variant_column = variant_info.variant_type->createColumn(); +} + +ColumnDynamic::ColumnDynamic( + MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_) + : variant_column(std::move(variant_column_)) + , variant_info(variant_info_) + , max_dynamic_types(max_dynamic_types_) + , statistics(statistics_) +{ +} + +ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_) +{ + VariantInfo variant_info; + variant_info.variant_type = variant_type; + variant_info.variant_name = variant_type->getName(); + const auto & variants = assert_cast(*variant_type).getVariants(); + variant_info.variant_names.reserve(variants.size()); + variant_info.variant_name_to_discriminator.reserve(variants.size()); + for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr) + { + variant_info.variant_names.push_back(variants[discr]->getName()); + variant_info.variant_name_to_discriminator[variant_info.variant_names.back()] = discr; + } + + return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_); +} + +bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant) +{ + /// Check if we already have such variant. + if (variant_info.variant_name_to_discriminator.contains(new_variant->getName())) + return true; + + /// Check if we reached maximum number of variants. + if (variant_info.variant_names.size() >= max_dynamic_types) + { + /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant. + /// Otherwise we won't be able to add cast new variants to Strings. + if (!variant_info.variant_name_to_discriminator.contains("String")) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists"); + + return false; + } + + /// If we have max_dynamic_types - 1 number of variants and don't have String variant, we can add only String variant. + if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String")) + return false; + + const DataTypes & current_variants = assert_cast(*variant_info.variant_type).getVariants(); + DataTypes all_variants = current_variants; + all_variants.push_back(new_variant); + auto new_variant_type = std::make_shared(all_variants); + const auto & new_variants = assert_cast(*new_variant_type).getVariants(); + + std::vector current_to_new_discriminators; + current_to_new_discriminators.resize(variant_info.variant_names.size()); + Names new_variant_names; + new_variant_names.reserve(new_variants.size()); + std::unordered_map new_variant_name_to_discriminator; + new_variant_name_to_discriminator.reserve(new_variants.size()); + std::vector> new_variant_columns_and_discriminators_to_add; + new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size()); + + for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr) + { + String name = new_variants[discr]->getName(); + new_variant_names.push_back(name); + new_variant_name_to_discriminator[name] = discr; + auto it = variant_info.variant_name_to_discriminator.find(name); + if (it == variant_info.variant_name_to_discriminator.end()) + new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr); + else + current_to_new_discriminators[it->second] = discr; + } + + variant_info.variant_type = new_variant_type; + variant_info.variant_name = new_variant_type->getName(); + variant_info.variant_names = new_variant_names; + variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator; + assert_cast(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add)); + variant_mappings_cache.clear(); + return true; +} + +void ColumnDynamic::addStringVariant() +{ + addNewVariant(std::make_shared()); +} + +void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type) +{ + const DataTypes & current_variants = assert_cast(variant_info.variant_type.get())->getVariants(); + const DataTypes & new_variants = assert_cast(new_variant_type.get())->getVariants(); + + Names new_variant_names; + new_variant_names.reserve(new_variants.size()); + std::unordered_map new_variant_name_to_discriminator; + new_variant_name_to_discriminator.reserve(new_variants.size()); + std::vector> new_variant_columns_and_discriminators_to_add; + new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size()); + std::vector current_to_new_discriminators; + current_to_new_discriminators.resize(current_variants.size()); + + for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr) + { + String name = new_variants[discr]->getName(); + new_variant_names.push_back(name); + new_variant_name_to_discriminator[name] = discr; + + auto current_it = variant_info.variant_name_to_discriminator.find(name); + if (current_it == variant_info.variant_name_to_discriminator.end()) + new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr); + else + current_to_new_discriminators[current_it->second] = discr; + } + + variant_info.variant_type = new_variant_type; + variant_info.variant_name = new_variant_type->getName(); + variant_info.variant_names = new_variant_names; + variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator; + assert_cast(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add)); + /// Clear mappings cache because now with new Variant we will have new mappings. + variant_mappings_cache.clear(); +} + +std::vector * ColumnDynamic::combineVariants(const DB::ColumnDynamic::VariantInfo & other_variant_info) +{ + /// Check if we already have global discriminators mapping for other Variant in cache. + /// It's used to not calculate the same mapping each call of insertFrom with the same columns. + auto cache_it = variant_mappings_cache.find(other_variant_info.variant_name); + if (cache_it != variant_mappings_cache.end()) + return &cache_it->second; + + /// Check if we already tried to combine these variants but failed due to max_dynamic_types limit. + if (variants_with_failed_combination.contains(other_variant_info.variant_name)) + return nullptr; + + const DataTypes & other_variants = assert_cast(*other_variant_info.variant_type).getVariants(); + + size_t num_new_variants = 0; + for (size_t i = 0; i != other_variants.size(); ++i) + { + if (!variant_info.variant_name_to_discriminator.contains(other_variant_info.variant_names[i])) + ++num_new_variants; + } + + /// If we have new variants we need to update current variant info and extend Variant column + if (num_new_variants) + { + const DataTypes & current_variants = assert_cast(*variant_info.variant_type).getVariants(); + + /// We cannot combine Variants if total number of variants exceeds max_dynamic_types. + if (current_variants.size() + num_new_variants > max_dynamic_types) + { + /// Remember that we cannot combine our variant with this one, so we will not try to do it again. + variants_with_failed_combination.insert(other_variant_info.variant_name); + return nullptr; + } + + /// We cannot combine Variants if total number of variants reaches max_dynamic_types and we don't have String variant. + if (current_variants.size() + num_new_variants == max_dynamic_types && !variant_info.variant_name_to_discriminator.contains("String") && !other_variant_info.variant_name_to_discriminator.contains("String")) + { + variants_with_failed_combination.insert(other_variant_info.variant_name); + return nullptr; + } + + DataTypes all_variants = current_variants; + all_variants.insert(all_variants.end(), other_variants.begin(), other_variants.end()); + auto new_variant_type = std::make_shared(all_variants); + updateVariantInfoAndExpandVariantColumn(new_variant_type); + } + + /// Create a global discriminators mapping for other variant. + std::vector other_to_new_discriminators; + other_to_new_discriminators.reserve(other_variants.size()); + for (size_t i = 0; i != other_variants.size(); ++i) + other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator[other_variant_info.variant_names[i]]); + + /// Save mapping to cache to not calculate it again for the same Variants. + auto [it, _] = variant_mappings_cache.emplace(other_variant_info.variant_name, std::move(other_to_new_discriminators)); + return &it->second; +} + +void ColumnDynamic::insert(const DB::Field & x) +{ + /// Check if we can insert field without Variant extension. + if (variant_column->tryInsert(x)) + return; + + /// If we cannot insert field into current variant column, extend it with new variant for this field from its type. + if (likely(addNewVariant(applyVisitor(FieldToDataType(), x)))) + { + /// Now we should be able to insert this field into extended variant column. + variant_column->insert(x); + } + else + { + /// We reached maximum number of variants and couldn't add new variant. + /// This case should be really rare in real use cases. + /// We should always be able to add String variant and cast inserted value to String. + addStringVariant(); + variant_column->insert(toString(x)); + } +} + +bool ColumnDynamic::tryInsert(const DB::Field & x) +{ + /// We can insert any value into Dynamic column. + insert(x); + return true; +} + + +void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +{ + const auto & dynamic_src = assert_cast(src_); + + /// Check if we have the same variants in both columns. + if (variant_info.variant_name == dynamic_src.variant_info.variant_name) + { + variant_column->insertFrom(*dynamic_src.variant_column, n); + return; + } + + auto & variant_col = assert_cast(*variant_column); + + /// If variants are different, we need to extend our variant with new variants. + if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + { + variant_col.insertFrom(*dynamic_src.variant_column, n, *global_discriminators_mapping); + return; + } + + /// We cannot combine 2 Variant types as total number of variants exceeds the limit. + /// We need to insert single value, try to add only corresponding variant. + const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); + auto src_global_discr = src_variant_col.globalDiscriminatorAt(n); + + /// NULL doesn't require Variant extension. + if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + insertDefault(); + return; + } + + auto variant_type = assert_cast(*dynamic_src.variant_info.variant_type).getVariants()[src_global_discr]; + if (addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator[dynamic_src.variant_info.variant_names[src_global_discr]]; + variant_col.insertIntoVariantFrom(discr, src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n)); + return; + } + + /// We reached maximum number of variants and couldn't add new variant. + /// We should always be able to add String variant and cast inserted value to String. + addStringVariant(); + auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty(); + tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n)); + auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + auto string_variant_discr = variant_info.variant_name_to_discriminator["String"]; + variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); +} + +void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +{ + if (start + length > src_.size()) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " + "[start({}) + length({}) > src.size()({})]", start, length, src_.size()); + + const auto & dynamic_src = assert_cast(src_); + + /// Check if we have the same variants in both columns. + if (variant_info.variant_names == dynamic_src.variant_info.variant_names) + { + variant_column->insertRangeFrom(*dynamic_src.variant_column, start, length); + return; + } + + auto & variant_col = assert_cast(*variant_column); + + /// If variants are different, we need to extend our variant with new variants. + if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + { + variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping); + return; + } + + /// We cannot combine 2 Variant types as total number of variants exceeds the limit. + /// In this case we will add most frequent variants from this range and insert them as usual, + /// all other variants will be converted to String. + const auto & src_variant_column = dynamic_src.getVariantColumn(); + + /// Calculate ranges for each variant in current range. + std::vector> variants_ranges(dynamic_src.variant_info.variant_names.size(), {0, 0}); + /// If we insert the whole column, no need to iterate through the range, we can just take variant sizes. + if (start == 0 && length == dynamic_src.size()) + { + for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) + variants_ranges[i] = {0, src_variant_column.getVariantByGlobalDiscriminator(i).size()}; + } + /// Otherwise we need to iterate through discriminators and calculate the range for each variant. + else + { + const auto & local_discriminators = src_variant_column.getLocalDiscriminators(); + const auto & offsets = src_variant_column.getOffsets(); + size_t end = start + length; + for (size_t i = start; i != end; ++i) + { + auto discr = src_variant_column.globalDiscriminatorByLocal(local_discriminators[i]); + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + { + if (!variants_ranges[discr].second) + variants_ranges[discr].first = offsets[i]; + ++variants_ranges[discr].second; + } + } + } + + const auto & src_variants = assert_cast(*dynamic_src.variant_info.variant_type).getVariants(); + /// List of variants that will be converted to String. + std::vector variants_to_convert_to_string; + /// Mapping from global discriminators of src_variant to the new variant we will create. + std::vector other_to_new_discriminators; + other_to_new_discriminators.reserve(dynamic_src.variant_info.variant_names.size()); + + /// Check if we cannot add any more new variants. In this case we will convert all new variants to String. + if (variant_info.variant_names.size() == max_dynamic_types || (variant_info.variant_names.size() == max_dynamic_types - 1 && !variant_info.variant_name_to_discriminator.contains("String"))) + { + addStringVariant(); + for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) + { + auto it = variant_info.variant_name_to_discriminator.find(dynamic_src.variant_info.variant_names[i]); + if (it == variant_info.variant_name_to_discriminator.end()) + { + variants_to_convert_to_string.push_back(i); + other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator["String"]); + } + else + { + other_to_new_discriminators.push_back(it->second); + } + } + } + /// We still can add some new variants, but not all of them. Let's choose the most frequent variants in specified range. + else + { + std::vector> new_variants_with_sizes; + new_variants_with_sizes.reserve(dynamic_src.variant_info.variant_names.size()); + for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i) + { + const auto & variant_name = dynamic_src.variant_info.variant_names[i]; + if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name)) + new_variants_with_sizes.emplace_back(variants_ranges[i].second, i); + } + + std::sort(new_variants_with_sizes.begin(), new_variants_with_sizes.end(), std::greater()); + DataTypes new_variants = assert_cast(*variant_info.variant_type).getVariants(); + if (!variant_info.variant_name_to_discriminator.contains("String")) + new_variants.push_back(std::make_shared()); + + for (const auto & [_, discr] : new_variants_with_sizes) + { + if (new_variants.size() != max_dynamic_types) + new_variants.push_back(src_variants[discr]); + else + variants_to_convert_to_string.push_back(discr); + } + + auto new_variant_type = std::make_shared(new_variants); + updateVariantInfoAndExpandVariantColumn(new_variant_type); + auto string_variant_discriminator = variant_info.variant_name_to_discriminator.at("String"); + for (const auto & variant_name : dynamic_src.variant_info.variant_names) + { + auto it = variant_info.variant_name_to_discriminator.find(variant_name); + if (it == variant_info.variant_name_to_discriminator.end()) + other_to_new_discriminators.push_back(string_variant_discriminator); + else + other_to_new_discriminators.push_back(it->second); + } + } + + /// Convert to String all variants that couldn't be added. + std::unordered_map variants_converted_to_string; + variants_converted_to_string.reserve(variants_to_convert_to_string.size()); + for (auto discr : variants_to_convert_to_string) + { + auto [variant_start, variant_length] = variants_ranges[discr]; + const auto & variant = src_variant_column.getVariantPtrByGlobalDiscriminator(discr); + if (variant_start == 0 && variant_length == variant->size()) + variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant, src_variants[discr], ""), std::make_shared()); + else + variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant->cut(variant_start, variant_length), src_variants[discr], ""), std::make_shared()); + } + + const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators(); + const auto & src_offsets = src_variant_column.getOffsets(); + const auto & src_variant_columns = src_variant_column.getVariants(); + size_t end = start + length; + for (size_t i = start; i != end; ++i) + { + auto local_discr = src_local_discriminators[i]; + if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + variant_col.insertDefault(); + } + else + { + auto global_discr = src_variant_column.globalDiscriminatorByLocal(local_discr); + auto to_global_discr = other_to_new_discriminators[global_discr]; + auto it = variants_converted_to_string.find(global_discr); + if (it == variants_converted_to_string.end()) + { + variant_col.insertIntoVariantFrom(to_global_discr, *src_variant_columns[local_discr], src_offsets[i]); + } + else + { + variant_col.insertIntoVariantFrom(to_global_discr, *it->second, src_offsets[i] - variants_ranges[global_discr].first); + } + } + } +} + +void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +{ + const auto & dynamic_src = assert_cast(src_); + + /// Check if we have the same variants in both columns. + if (variant_info.variant_names == dynamic_src.variant_info.variant_names) + { + variant_column->insertManyFrom(*dynamic_src.variant_column, position, length); + return; + } + + auto & variant_col = assert_cast(*variant_column); + + /// If variants are different, we need to extend our variant with new variants. + if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + { + variant_col.insertManyFrom(*dynamic_src.variant_column, position, length, *global_discriminators_mapping); + return; + } + + /// We cannot combine 2 Variant types as total number of variants exceeds the limit. + /// We need to insert single value, try to add only corresponding variant. + const auto & src_variant_col = assert_cast(*dynamic_src.variant_column); + auto src_global_discr = src_variant_col.globalDiscriminatorAt(position); + if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + insertDefault(); + return; + } + + auto variant_type = assert_cast(*dynamic_src.variant_info.variant_type).getVariants()[src_global_discr]; + if (addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator[dynamic_src.variant_info.variant_names[src_global_discr]]; + variant_col.insertManyIntoVariantFrom(discr, src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position), length); + return; + } + + addStringVariant(); + auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty(); + tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position)); + auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + auto string_variant_discr = variant_info.variant_name_to_discriminator["String"]; + variant_col.insertManyIntoVariantFrom(string_variant_discr, *tmp_string_column, 0, length); +} + + +StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, const char *& begin) const +{ + /// We cannot use Variant serialization here as it serializes discriminator + value, + /// but Dynamic doesn't have fixed mapping discriminator <-> variant type + /// as different Dynamic column can have different Variants. + /// Instead, we serialize null bit + variant type name (size + bytes) + value. + const auto & variant_col = assert_cast(*variant_column); + auto discr = variant_col.globalDiscriminatorAt(n); + StringRef res; + UInt8 null_bit = discr == ColumnVariant::NULL_DISCRIMINATOR; + if (null_bit) + { + char * pos = arena.allocContinue(sizeof(UInt8), begin); + memcpy(pos, &null_bit, sizeof(UInt8)); + res.data = pos; + res.size = sizeof(UInt8); + return res; + } + + const auto & variant_name = variant_info.variant_names[discr]; + size_t variant_name_size = variant_name.size(); + char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_name.size(), begin); + memcpy(pos, &null_bit, sizeof(UInt8)); + memcpy(pos + sizeof(UInt8), &variant_name_size, sizeof(size_t)); + memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_name.data(), variant_name.size()); + res.data = pos; + res.size = sizeof(UInt8) + sizeof(size_t) + variant_name.size(); + + auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin); + res.data = value_ref.data - res.size; + res.size += value_ref.size; + return res; +} + +const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) +{ + auto & variant_col = assert_cast(*variant_column); + UInt8 null_bit = unalignedLoad(pos); + pos += sizeof(UInt8); + if (null_bit) + { + insertDefault(); + return pos; + } + + /// Read variant type name. + const size_t variant_name_size = unalignedLoad(pos); + pos += sizeof(variant_name_size); + String variant_name; + variant_name.resize(variant_name_size); + memcpy(variant_name.data(), pos, variant_name_size); + pos += variant_name_size; + /// If we already have such variant, just deserialize it into corresponding variant column. + auto it = variant_info.variant_name_to_discriminator.find(variant_name); + if (it != variant_info.variant_name_to_discriminator.end()) + { + auto discr = it->second; + return variant_col.deserializeVariantAndInsertFromArena(discr, pos); + } + + /// If we don't have such variant, add it. + auto variant_type = DataTypeFactory::instance().get(variant_name); + if (likely(addNewVariant(variant_type))) + { + auto discr = variant_info.variant_name_to_discriminator[variant_name]; + return variant_col.deserializeVariantAndInsertFromArena(discr, pos); + } + + /// We reached maximum number of variants and couldn't add new variant. + /// This case should be really rare in real use cases. + /// We should always be able to add String variant and cast inserted value to String. + addStringVariant(); + /// Create temporary column of this variant type and deserialize value into it. + auto tmp_variant_column = variant_type->createColumn(); + pos = tmp_variant_column->deserializeAndInsertFromArena(pos); + /// Cast temporary column to String and insert this value into String variant. + auto str_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + variant_col.insertIntoVariantFrom(variant_info.variant_name_to_discriminator["String"], *str_column, 0); + return pos; +} + +const char * ColumnDynamic::skipSerializedInArena(const char * pos) const +{ + UInt8 null_bit = unalignedLoad(pos); + pos += sizeof(UInt8); + if (null_bit) + return pos; + + const size_t variant_name_size = unalignedLoad(pos); + pos += sizeof(variant_name_size); + String variant_name; + variant_name.resize(variant_name_size); + memcpy(variant_name.data(), pos, variant_name_size); + pos += variant_name_size; + auto tmp_variant_column = DataTypeFactory::instance().get(variant_name)->createColumn(); + return tmp_variant_column->skipSerializedInArena(pos); +} + +void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const +{ + const auto & variant_col = assert_cast(*variant_column); + auto discr = variant_col.globalDiscriminatorAt(n); + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + { + hash.update(discr); + return; + } + + hash.update(variant_info.variant_names[discr]); + variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); +} + +int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +{ + const auto & left_variant = assert_cast(*variant_column); + const auto & right_dynamic = assert_cast(rhs); + const auto & right_variant = assert_cast(*right_dynamic.variant_column); + + auto left_discr = left_variant.globalDiscriminatorAt(n); + auto right_discr = right_variant.globalDiscriminatorAt(m); + + /// Check if we have NULLs and return result based on nan_direction_hint. + if (left_discr == ColumnVariant::NULL_DISCRIMINATOR && right_discr == ColumnVariant::NULL_DISCRIMINATOR) + return 0; + else if (left_discr == ColumnVariant::NULL_DISCRIMINATOR) + return nan_direction_hint; + else if (right_discr == ColumnVariant::NULL_DISCRIMINATOR) + return -nan_direction_hint; + + /// If rows have different types, we compare type names. + if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr]) + return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1; + + /// If rows have the same types, compare actual values from corresponding variants. + return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint); +} + +ColumnPtr ColumnDynamic::compress() const +{ + ColumnPtr variant_compressed = variant_column->compress(); + size_t byte_size = variant_compressed->byteSize(); + return ColumnCompressed::create(size(), byte_size, + [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_statistics = statistics]() mutable + { + return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_statistics); + }); +} + +void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + if (!empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column"); + + /// During serialization of Dynamic column in MergeTree all Dynamic columns + /// in single part must have the same structure (the same variants). During merge + /// resulting column is constructed by inserting from source columns, + /// but it may happen that resulting column doesn't have rows from all source parts + /// but only from subset of them, and as a result some variants could be missing + /// and structures of resulting column may differ. + /// To solve this problem, before merge we create empty resulting column and use this method + /// to take dynamic structure from all source column even if we won't insert + /// rows from some of them. + + /// We want to construct resulting variant with most frequent variants from source columns and convert the rarest + /// variants to single String variant if we exceed the limit of variants. + /// First, collect all variants from all source columns and calculate total sizes. + std::unordered_map total_sizes; + DataTypes all_variants; + + for (const auto & source_column : source_columns) + { + const auto & source_dynamic = assert_cast(*source_column); + const auto & source_variant_column = source_dynamic.getVariantColumn(); + const auto & source_variant_info = source_dynamic.getVariantInfo(); + const auto & source_variants = assert_cast(*source_variant_info.variant_type).getVariants(); + /// During deserialization from MergeTree we will have variant sizes statistics from the whole data part. + const auto & source_statistics = source_dynamic.getStatistics(); + for (size_t i = 0; i != source_variants.size(); ++i) + { + const auto & variant_name = source_variant_info.variant_names[i]; + auto it = total_sizes.find(variant_name); + /// Add this variant to the list of all variants if we didn't see it yet. + if (it == total_sizes.end()) + { + all_variants.push_back(source_variants[i]); + it = total_sizes.emplace(variant_name, 0).first; + } + + size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name); +// LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size); + it->second += size; + } + } + + DataTypePtr result_variant_type; + /// Check if the number of all variants exceeds the limit. + if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_sizes.contains("String"))) + { + /// Create list of variants with their sizes and sort it. + std::vector> variants_with_sizes; + variants_with_sizes.reserve(all_variants.size()); + for (const auto & variant : all_variants) + { +// LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]); + variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); + } + std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); + + /// Take first max_dynamic_types variants from sorted list. + DataTypes result_variants; + result_variants.reserve(max_dynamic_types); + /// Add String variant in advance. + result_variants.push_back(std::make_shared()); + size_t i = 0; + while (result_variants.size() != max_dynamic_types && i < variants_with_sizes.size()) + { + const auto & variant = variants_with_sizes[i].second; + if (variant->getName() != "String") + result_variants.push_back(variant); + ++i; + } + + result_variant_type = std::make_shared(result_variants); + } + else + { + result_variant_type = std::make_shared(all_variants); + } + + /// Now we have resulting Variant and can fill variant info. + variant_info.variant_type = result_variant_type; + variant_info.variant_name = result_variant_type->getName(); + const auto & result_variants = assert_cast(*result_variant_type).getVariants(); + variant_info.variant_names.clear(); + variant_info.variant_names.reserve(result_variants.size()); + variant_info.variant_name_to_discriminator.clear(); + variant_info.variant_name_to_discriminator.reserve(result_variants.size()); + statistics.data.clear(); + statistics.data.reserve(result_variants.size()); + statistics.source = Statistics::Source::MERGE; + for (size_t i = 0; i != result_variants.size(); ++i) + { + auto variant_name = result_variants[i]->getName(); + variant_info.variant_names.push_back(variant_name); + variant_info.variant_name_to_discriminator[variant_name] = i; + statistics.data[variant_name] = total_sizes[variant_name]; + } + + variant_column = variant_info.variant_type->createColumn(); + + /// Now we have the resulting Variant that will be used in all merged columns. + /// Variants can also contain Dynamic columns inside, we should collect + /// all source variants that will be used in the resulting merged column + /// and call takeDynamicStructureFromSourceColumns on all resulting variants. + std::vector variants_source_columns; + variants_source_columns.resize(variant_info.variant_names.size()); + for (const auto & source_column : source_columns) + { + const auto & source_dynamic_column = assert_cast(*source_column); + const auto & source_variant_info = source_dynamic_column.getVariantInfo(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + /// Try to find this variant in current source column. + auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]); + if (it != source_variant_info.variant_name_to_discriminator.end()) + variants_source_columns[i].push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second)); + } + } + + auto & variant_col = getVariantColumn(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + variant_col.getVariantByGlobalDiscriminator(i).takeDynamicStructureFromSourceColumns(variants_source_columns[i]); +} + +void ColumnDynamic::applyNullMap(const ColumnVector::Container & null_map) +{ + assert_cast(*variant_column).applyNullMap(null_map); +} + +void ColumnDynamic::applyNegatedNullMap(const ColumnVector::Container & null_map) +{ + assert_cast(*variant_column).applyNegatedNullMap(null_map); +} + +} diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h new file mode 100644 index 00000000000..7487a5aa0db --- /dev/null +++ b/src/Columns/ColumnDynamic.h @@ -0,0 +1,363 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +/** + * Column for storing Dynamic type values. + * Dynamic column allows to insert and store values of any data types inside. + * Inside it stores: + * - Variant column with all inserted values of different types. + * - Information about currently stored variants. + * + * When new values are inserted into Dynamic column, the internal Variant + * type and column are extended if the inserted value has new type. + */ +class ColumnDynamic final : public COWHelper, ColumnDynamic> +{ +public: + struct Statistics + { + enum class Source + { + READ, + MERGE, + }; + + Source source; + std::unordered_map data; + }; + +private: + friend class COWHelper, ColumnDynamic>; + + struct VariantInfo + { + DataTypePtr variant_type; + /// Name of the whole variant to not call getName() every time. + String variant_name; + /// Store names of variants to not call getName() every time on variants. + Names variant_names; + /// Store mapping (variant name) -> (global discriminator). + /// It's used during variant extension. + std::unordered_map variant_name_to_discriminator; + }; + + ColumnDynamic(size_t max_dynamic_types_); + ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}); + +public: + /** Create immutable column using immutable arguments. This arguments may be shared with other columns. + * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + */ + using Base = COWHelper, ColumnDynamic>; + static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + { + return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, statistics_); + } + + static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + { + return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, statistics_); + } + + static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {}); + + static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {}) + { + return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, statistics_); + } + + static MutablePtr create(size_t max_dynamic_types_) + { + return Base::create(max_dynamic_types_); + } + + std::string getName() const override { return "Dynamic(max_types=" + std::to_string(max_dynamic_types) + ")"; } + + const char * getFamilyName() const override + { + return "Dynamic"; + } + + TypeIndex getDataType() const override + { + return TypeIndex::Dynamic; + } + + MutableColumnPtr cloneEmpty() const override + { + /// Keep current dynamic structure. + return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics); + } + + MutableColumnPtr cloneResized(size_t size) const override + { + return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics); + } + + size_t size() const override + { + return variant_column->size(); + } + + Field operator[](size_t n) const override + { + return (*variant_column)[n]; + } + + void get(size_t n, Field & res) const override + { + variant_column->get(n, res); + } + + bool isDefaultAt(size_t n) const override + { + return variant_column->isDefaultAt(n); + } + + bool isNullAt(size_t n) const override + { + return variant_column->isNullAt(n); + } + + StringRef getDataAt(size_t n) const override + { + return variant_column->getDataAt(n); + } + + void insertData(const char * pos, size_t length) override + { + return variant_column->insertData(pos, length); + } + + void insert(const Field & x) override; + bool tryInsert(const Field & x) override; + void insertFrom(const IColumn & src_, size_t n) override; + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + + void insertDefault() override + { + variant_column->insertDefault(); + } + + void insertManyDefaults(size_t length) override + { + variant_column->insertManyDefaults(length); + } + + void popBack(size_t n) override + { + variant_column->popBack(n); + } + + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + const char * deserializeAndInsertFromArena(const char * pos) override; + const char * skipSerializedInArena(const char * pos) const override; + + void updateHashWithValue(size_t n, SipHash & hash) const override; + + void updateWeakHash32(WeakHash32 & hash) const override + { + variant_column->updateWeakHash32(hash); + } + + void updateHashFast(SipHash & hash) const override + { + variant_column->updateHashFast(hash); + } + + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override + { + return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types); + } + + void expand(const Filter & mask, bool inverted) override + { + variant_column->expand(mask, inverted); + } + + ColumnPtr permute(const Permutation & perm, size_t limit) const override + { + return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types); + } + + ColumnPtr index(const IColumn & indexes, size_t limit) const override + { + return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types); + } + + ColumnPtr replicate(const Offsets & replicate_offsets) const override + { + return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types); + } + + MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override + { + MutableColumns scattered_variant_columns = variant_column->scatter(num_columns, selector); + MutableColumns scattered_columns; + scattered_columns.reserve(num_columns); + for (auto & scattered_variant_column : scattered_variant_columns) + scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types)); + + return scattered_columns; + } + + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + + bool hasEqualValues() const override + { + return variant_column->hasEqualValues(); + } + + void getExtremes(Field & min, Field & max) const override + { + variant_column->getExtremes(min, max); + } + + void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override + { + variant_column->getPermutation(direction, stability, limit, nan_direction_hint, res); + } + + void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override + { + variant_column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges); + } + + void reserve(size_t n) override + { + variant_column->reserve(n); + } + + void ensureOwnership() override + { + variant_column->ensureOwnership(); + } + + size_t byteSize() const override + { + return variant_column->byteSize(); + } + + size_t byteSizeAt(size_t n) const override + { + return variant_column->byteSizeAt(n); + } + + size_t allocatedBytes() const override + { + return variant_column->allocatedBytes(); + } + + void protect() override + { + variant_column->protect(); + } + + void forEachSubcolumn(MutableColumnCallback callback) override + { + callback(variant_column); + } + + void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override + { + callback(*variant_column); + variant_column->forEachSubcolumnRecursively(callback); + } + + bool structureEquals(const IColumn & rhs) const override + { + if (const auto * rhs_concrete = typeid_cast(&rhs)) + return max_dynamic_types == rhs_concrete->max_dynamic_types; + return false; + } + + ColumnPtr compress() const override; + + double getRatioOfDefaultRows(double sample_ratio) const override + { + return variant_column->getRatioOfDefaultRows(sample_ratio); + } + + UInt64 getNumberOfDefaultRows() const override + { + return variant_column->getNumberOfDefaultRows(); + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + variant_column->getIndicesOfNonDefaultRows(indices, from, limit); + } + + void finalize() override + { + variant_column->finalize(); + } + + bool isFinalized() const override + { + return variant_column->isFinalized(); + } + + /// Apply null map to a nested Variant column. + void applyNullMap(const ColumnVector::Container & null_map); + void applyNegatedNullMap(const ColumnVector::Container & null_map); + + const VariantInfo & getVariantInfo() const { return variant_info; } + + const ColumnPtr & getVariantColumnPtr() const { return variant_column; } + ColumnPtr & getVariantColumnPtr() { return variant_column; } + + const ColumnVariant & getVariantColumn() const { return assert_cast(*variant_column); } + ColumnVariant & getVariantColumn() { return assert_cast(*variant_column); } + + bool addNewVariant(const DataTypePtr & new_variant); + void addStringVariant(); + + bool hasDynamicStructure() const override { return true; } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + + const Statistics & getStatistics() const { return statistics; } + + size_t getMaxDynamicTypes() const { return max_dynamic_types; } + +private: + /// Combine current variant with the other variant and return global discriminators mapping + /// from other variant to the combined one. It's used for inserting from + /// different variants. + /// Returns nullptr if maximum number of Variants is reached and tne new Variant cannot be created. + std::vector * combineVariants(const VariantInfo & other_variant_info); + + void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type); + + WrappedPtr variant_column; + /// Store the type of current variant with some additional information. + VariantInfo variant_info; + /// Maximum number of different types that can be stored in Dynamic. + /// If exceeded, all new variants will be converted to String. + size_t max_dynamic_types; + + /// Size statistics of each variants from MergeTree data part. + /// Used in takeDynamicStructureFromSourceColumns and set during deserialization. + Statistics statistics; + + std::unordered_map> variant_mappings_cache; + std::unordered_set variants_with_failed_combination; +}; + +} diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 57e8ba685b4..48e8bced23a 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -312,4 +312,13 @@ ColumnPtr ColumnMap::compress() const }); } +void ColumnMap::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + } diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 60aa69e7bf6..52165d0d74e 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -104,6 +104,9 @@ public: ColumnTuple & getNestedData() { return assert_cast(getNestedColumn().getData()); } ColumnPtr compress() const override; + + bool hasDynamicStructure() const override { return nested->hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; }; } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index fa5fdfb8c21..4474816601e 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -835,6 +835,15 @@ ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const return res; } +void ColumnNullable::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested_column->takeDynamicStructureFromSourceColumns(nested_source_columns); +} + ColumnPtr makeNullable(const ColumnPtr & column) { if (isColumnNullable(*column)) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index ef4bf4fa41b..73bd75527f8 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -186,6 +186,9 @@ public: /// Check that size of null map equals to size of nested column. void checkConsistency() const; + bool hasDynamicStructure() const override { return nested_column->hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: WrappedPtr nested_column; WrappedPtr null_map; diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index b9a173fd92c..4acd162e52f 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -801,6 +801,15 @@ ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const return Iterator(offsets_data, _size, current_offset, n); } +void ColumnSparse::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + Columns values_source_columns; + values_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + values_source_columns.push_back(assert_cast(*source_column).getValuesPtr()); + values->takeDynamicStructureFromSourceColumns(values_source_columns); +} + ColumnPtr recursiveRemoveSparse(const ColumnPtr & column) { if (!column) diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index c1bd614102c..7d3200da35f 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -148,6 +148,9 @@ public: size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); } bool isCollationSupported() const override { return values->isCollationSupported(); } + bool hasDynamicStructure() const override { return values->hasDynamicStructure(); } + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + size_t getNumberOfTrailingDefaults() const { return offsets->empty() ? _size : _size - getOffsetsData().back() - 1; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 062bdadf9d2..4e8e4063157 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -572,6 +572,34 @@ bool ColumnTuple::isCollationSupported() const return false; } +bool ColumnTuple::hasDynamicStructure() const +{ + for (const auto & column : columns) + { + if (column->hasDynamicStructure()) + return true; + } + return false; +} + +void ColumnTuple::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + std::vector nested_source_columns; + nested_source_columns.resize(columns.size()); + for (size_t i = 0; i != columns.size(); ++i) + nested_source_columns[i].reserve(source_columns.size()); + + for (const auto & source_column : source_columns) + { + const auto & nsource_columns = assert_cast(*source_column).getColumns(); + for (size_t i = 0; i != nsource_columns.size(); ++i) + nested_source_columns[i].push_back(nsource_columns[i]); + } + + for (size_t i = 0; i != columns.size(); ++i) + columns[i]->takeDynamicStructureFromSourceColumns(nested_source_columns[i]); +} + ColumnPtr ColumnTuple::compress() const { diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 5b626155754..65103fa8c49 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -114,6 +114,9 @@ public: const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } ColumnPtr & getColumnPtr(size_t idx) { return columns[idx]; } + bool hasDynamicStructure() const override; + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 31e9b0964f4..819491f7fd9 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include @@ -452,16 +451,18 @@ bool ColumnVariant::tryInsert(const DB::Field & x) return false; } -void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std::vector * global_discriminators_mapping) { + const size_t num_variants = variants.size(); const ColumnVariant & src = assert_cast(src_); - const size_t num_variants = variants.size(); - if (src.variants.size() != num_variants) + if (!global_discriminators_mapping && src.variants.size() != num_variants) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types"); - /// Remember that src column can have different local variants order. - Discriminator global_discr = src.globalDiscriminatorAt(n); + Discriminator src_global_discr = src.globalDiscriminatorAt(n); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; Discriminator local_discr = localDiscriminatorByGlobal(global_discr); getLocalDiscriminators().push_back(local_discr); if (local_discr == NULL_DISCRIMINATOR) @@ -471,25 +472,15 @@ void ColumnVariant::insertFrom(const IColumn & src_, size_t n) else { getOffsets().push_back(variants[local_discr]->size()); - variants[local_discr]->insertFrom(src.getVariantByGlobalDiscriminator(global_discr), src.offsetAt(n)); + variants[local_discr]->insertFrom(src.getVariantByGlobalDiscriminator(src_global_discr), src.offsetAt(n)); } } -void ColumnVariant::insertIntoVariant(const DB::Field & x, Discriminator global_discr) -{ - if (global_discr > variants.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid global discriminator: {}. The number of variants is {}", size_t(global_discr), variants.size()); - auto & variant = getVariantByGlobalDiscriminator(global_discr); - variant.insert(x); - getLocalDiscriminators().push_back(localDiscriminatorByGlobal(global_discr)); - getOffsets().push_back(variant.size() - 1); -} - -void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping) { const size_t num_variants = variants.size(); const auto & src = assert_cast(src_); - if (src.variants.size() != num_variants) + if (!global_discriminators_mapping && src.variants.size() != num_variants) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types"); if (start + length > src.getLocalDiscriminators().size()) @@ -507,7 +498,12 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l /// In this case we can simply call insertRangeFrom on this single variant. if (auto non_empty_src_local_discr = src.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) { - auto local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(*non_empty_src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(*non_empty_src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); size_t offset = variants[local_discr]->size(); variants[local_discr]->insertRangeFrom(*src.variants[*non_empty_src_local_discr], start, length); getLocalDiscriminators().resize_fill(local_discriminators->size() + length, local_discr); @@ -522,7 +518,7 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l /// collect ranges we need to insert for all variants and update offsets. /// nested_ranges[i].first - offset in src.variants[i] /// nested_ranges[i].second - length in src.variants[i] - std::vector> nested_ranges(num_variants, {0, 0}); + std::vector> nested_ranges(src.variants.size(), {0, 0}); auto & offsets_data = getOffsets(); offsets_data.reserve(offsets_data.size() + length); auto & local_discriminators_data = getLocalDiscriminators(); @@ -533,7 +529,11 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l { /// We insert from src.variants[src_local_discr] to variants[local_discr] Discriminator src_local_discr = src_local_discriminators_data[i]; - Discriminator local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); local_discriminators_data.push_back(local_discr); if (local_discr == NULL_DISCRIMINATOR) { @@ -553,22 +553,29 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l for (size_t src_local_discr = 0; src_local_discr != nested_ranges.size(); ++src_local_discr) { auto [nested_start, nested_length] = nested_ranges[src_local_discr]; - auto local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); if (nested_length) variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length); } } -void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position, size_t length, const std::vector * global_discriminators_mapping) { const size_t num_variants = variants.size(); const auto & src = assert_cast(src_); - if (src.variants.size() != num_variants) + if (!global_discriminators_mapping && src.variants.size() != num_variants) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types"); - /// Remember that src column can have different local variants order. Discriminator src_local_discr = src.localDiscriminatorAt(position); - Discriminator local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr)); + Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr); + Discriminator global_discr = src_global_discr; + if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR) + global_discr = (*global_discriminators_mapping)[src_global_discr]; + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); auto & local_discriminators_data = getLocalDiscriminators(); local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr); @@ -588,6 +595,72 @@ void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, si } } +void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +{ + insertFromImpl(src_, n, nullptr); +} + +void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +{ + insertRangeFromImpl(src_, start, length, nullptr); +} + +void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +{ + insertManyFromImpl(src_, position, length, nullptr); +} + +void ColumnVariant::insertFrom(const DB::IColumn & src_, size_t n, const std::vector & global_discriminators_mapping) +{ + insertFromImpl(src_, n, &global_discriminators_mapping); +} + +void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping) +{ + insertRangeFromImpl(src_, start, length, &global_discriminators_mapping); +} + +void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping) +{ + insertManyFromImpl(src_, position, length, &global_discriminators_mapping); +} + +void ColumnVariant::insertIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t n) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + getLocalDiscriminators().push_back(local_discr); + getOffsets().push_back(variants[local_discr]->size()); + variants[local_discr]->insertFrom(src_, n); +} + +void ColumnVariant::insertRangeIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t start, size_t length) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + auto & local_discriminators_data = getLocalDiscriminators(); + local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr); + auto & offsets_data = getOffsets(); + size_t offset = variants[local_discr]->size(); + offsets_data.reserve(offsets_data.size() + length); + for (size_t i = 0; i != length; ++i) + offsets_data.push_back(offset + i); + + variants[local_discr]->insertRangeFrom(src_, start, length); +} + +void ColumnVariant::insertManyIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t position, size_t length) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + auto & local_discriminators_data = getLocalDiscriminators(); + local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr); + auto & offsets_data = getOffsets(); + size_t offset = variants[local_discr]->size(); + offsets_data.reserve(offsets_data.size() + length); + for (size_t i = 0; i != length; ++i) + offsets_data.push_back(offset + i); + + variants[local_discr]->insertManyFrom(src_, position, length); +} + void ColumnVariant::insertDefault() { getLocalDiscriminators().push_back(NULL_DISCRIMINATOR); @@ -678,6 +751,14 @@ const char * ColumnVariant::deserializeAndInsertFromArena(const char * pos) return variants[local_discr]->deserializeAndInsertFromArena(pos); } +const char * ColumnVariant::deserializeVariantAndInsertFromArena(DB::ColumnVariant::Discriminator global_discr, const char * pos) +{ + Discriminator local_discr = localDiscriminatorByGlobal(global_discr); + getLocalDiscriminators().push_back(local_discr); + getOffsets().push_back(variants[local_discr]->size()); + return variants[local_discr]->deserializeAndInsertFromArena(pos); +} + const char * ColumnVariant::skipSerializedInArena(const char * pos) const { Discriminator global_discr = unalignedLoad(pos); @@ -1426,4 +1507,54 @@ void ColumnVariant::applyNullMapImpl(const ColumnVector::Container & null } } +void ColumnVariant::extend(const std::vector & old_to_new_global_discriminators, std::vector> && new_variants_and_discriminators) +{ + /// Update global discriminators for current variants. + for (Discriminator & global_discr : local_to_global_discriminators) + global_discr = old_to_new_global_discriminators[global_discr]; + + /// Add new variants. + variants.reserve(variants.size() + new_variants_and_discriminators.size()); + local_to_global_discriminators.reserve(local_to_global_discriminators.size() + new_variants_and_discriminators.size()); + for (auto & new_variant_and_discriminator : new_variants_and_discriminators) + { + variants.emplace_back(std::move(new_variant_and_discriminator.first)); + local_to_global_discriminators.push_back(new_variant_and_discriminator.second); + } + + /// Update global -> local discriminators matching. + global_to_local_discriminators.resize(local_to_global_discriminators.size()); + for (Discriminator local_discr = 0; local_discr != local_to_global_discriminators.size(); ++local_discr) + global_to_local_discriminators[local_to_global_discriminators[local_discr]] = local_discr; +} + +bool ColumnVariant::hasDynamicStructure() const +{ + for (const auto & variant : variants) + { + if (variant->hasDynamicStructure()) + return true; + } + + return false; +} + +void ColumnVariant::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +{ + std::vector variants_source_columns; + variants_source_columns.resize(variants.size()); + for (size_t i = 0; i != variants.size(); ++i) + variants_source_columns[i].reserve(source_columns.size()); + + for (const auto & source_column : source_columns) + { + const auto & source_variants = assert_cast(*source_column).variants; + for (size_t i = 0; i != source_variants.size(); ++i) + variants_source_columns[i].push_back(source_variants[i]); + } + + for (size_t i = 0; i != variants.size(); ++i) + variants[i]->takeDynamicStructureFromSourceColumns(variants_source_columns[i]); +} + } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 4aa2c9058cc..8f703ea17d9 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -175,18 +175,32 @@ public: bool isDefaultAt(size_t n) const override; bool isNullAt(size_t n) const override; StringRef getDataAt(size_t n) const override; + void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertIntoVariant(const Field & x, Discriminator global_discr); + void insertFrom(const IColumn & src_, size_t n) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; + + /// Methods for insertion from another Variant but with known mapping between global discriminators. + void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); + void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping); + void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping); + + /// Methods for insertrion into a specific variant. + void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n); + void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length); + void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length); + void insertDefault() override; void insertManyDefaults(size_t length) override; + void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; const char * deserializeAndInsertFromArena(const char * pos) override; + const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos); const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; @@ -234,6 +248,8 @@ public: ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) { return variants[discr]; } ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; } + const NestedColumns & getVariants() const { return variants; } + const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; } IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; } @@ -282,7 +298,19 @@ public: void applyNullMap(const ColumnVector::Container & null_map); void applyNegatedNullMap(const ColumnVector::Container & null_map); + /// Extend current column with new variants. Change global discriminators of current variants to the new + /// according to the mapping and add new variants with new global discriminators. + /// This extension doesn't rewrite any data, just adds new empty variants and modifies global/local discriminators matching. + void extend(const std::vector & old_to_new_global_discriminators, std::vector> && new_variants_and_discriminators); + + bool hasDynamicStructure() const override; + void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + private: + void insertFromImpl(const IColumn & src_, size_t n, const std::vector * global_discriminators_mapping); + void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector * global_discriminators_mapping); + void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector * global_discriminators_mapping); + void initIdentityGlobalToLocalDiscriminatorsMapping(); template diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 18974e49760..479fd7de1bc 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -461,6 +462,7 @@ template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; template class IColumnHelper; +template class IColumnHelper; template class IColumnHelper; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index cea8d7c9f55..33f398474ed 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -534,6 +534,8 @@ public: return res; } + virtual bool hasDynamicStructure() const { return false; } + virtual void takeDynamicStructureFromSourceColumns(const std::vector & /*source_columns*/) {} /** Some columns can contain another columns inside. * So, we have a tree of columns. But not all combinations are possible. diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp new file mode 100644 index 00000000000..4c209f7d8a9 --- /dev/null +++ b/src/Columns/tests/gtest_column_dynamic.cpp @@ -0,0 +1,652 @@ +#include +#include +#include +#include + +using namespace DB; + +TEST(ColumnDynamic, CreateEmpty) +{ + auto column = ColumnDynamic::create(255); + ASSERT_TRUE(column->empty()); + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()"); + ASSERT_TRUE(column->getVariantInfo().variant_names.empty()); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty()); +} + +TEST(ColumnDynamic, InsertDefault) +{ + auto column = ColumnDynamic::create(255); + column->insertDefault(); + ASSERT_TRUE(column->size() == 1); + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()"); + ASSERT_TRUE(column->getVariantInfo().variant_names.empty()); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty()); + ASSERT_TRUE(column->isNullAt(0)); + ASSERT_EQ((*column)[0], Field(Null())); +} + +TEST(ColumnDynamic, InsertFields) +{ + auto column = ColumnDynamic::create(255); + column->insert(Field(42)); + column->insert(Field(-42)); + column->insert(Field("str1")); + column->insert(Field(Null())); + column->insert(Field(42.42)); + column->insert(Field(43)); + column->insert(Field(-43)); + column->insert(Field("str2")); + column->insert(Field(Null())); + column->insert(Field(43.43)); + ASSERT_TRUE(column->size() == 10); + + ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)"); + std::vector expected_names = {"Float64", "Int8", "String"}; + ASSERT_EQ(column->getVariantInfo().variant_names, expected_names); + std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}}; + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +ColumnDynamic::MutablePtr getDynamicWithManyVariants(size_t num_variants, Field tuple_element = Field(42)) +{ + auto column = ColumnDynamic::create(255); + for (size_t i = 0; i != num_variants; ++i) + { + Tuple tuple; + for (size_t j = 0; j != i + 1; ++j) + tuple.push_back(tuple_element); + column->insert(tuple); + } + + return column; +} + +TEST(ColumnDynamic, InsertFieldsOverflow1) +{ + auto column = getDynamicWithManyVariants(253); + + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 253); + + column->insert(Field(42.42)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + + column->insert(Field(42)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + Field field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "42"); + + column->insert(Field(43)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "43"); + + column->insert(Field("str1")); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "str1"); + + column->insert(Field(Array({Field(42), Field(43)}))); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "[42, 43]"); +} + +TEST(ColumnDynamic, InsertFieldsOverflow2) +{ + auto column = getDynamicWithManyVariants(254); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254); + + column->insert(Field("str1")); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + + column->insert(Field(42)); + ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255); + ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String")); + Field field = (*column)[column->size() - 1]; + ASSERT_EQ(field, "42"); +} + +ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1) +{ + auto column_from = ColumnDynamic::create(255); + for (size_t i = 0; i != num; ++i) + { + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + } + return column_from; +} + +void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) +{ + column_to->insertFrom(*column_from, 0); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42.42); + + column_to->insertFrom(*column_from, 2); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, InsertFrom1) +{ + auto column_to = ColumnDynamic::create(255); + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertFrom2) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertFrom3) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + column_to->insert(Array({Field(42)})); + + checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); +} + +TEST(ColumnDynamic, InsertFromOverflow1) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertFrom(*column_from, 0); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); + + column_to->insertFrom(*column_from, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); +} + +TEST(ColumnDynamic, InsertFromOverflow2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertFrom(*column_from, 0); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertFrom(*column_from, 1); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); +} + +void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) +{ + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42.42); + + column_to->insertManyFrom(*column_from, 2, 2); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "str"); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, InsertManyFrom1) +{ + auto column_to = ColumnDynamic::create(255); + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertManyFrom2) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertManyFrom3) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str")); + column_to->insert(Array({Field(42)})); + + checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); +} + +TEST(ColumnDynamic, InsertManyFromOverflow1) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "42.42"); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); + + column_to->insertManyFrom(*column_from, 2, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "str"); +} + +TEST(ColumnDynamic, InsertManyFromOverflow2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertManyFrom(*column_from, 0, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + auto field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, 42); + + column_to->insertManyFrom(*column_from, 1, 2); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, "42.42"); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "42.42"); +} + +void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector & expected_names, const std::unordered_map & expected_variant_name_to_discriminator) +{ + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + column_to->insertRangeFrom(*column_from, 3, 3); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, 42); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, 42.42); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, "str"); + + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant); + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, InsertRangeFrom1) +{ + auto column_to = ColumnDynamic::create(255); + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertRangeFrom2) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str1")); + + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}}); +} + +TEST(ColumnDynamic, InsertRangeFrom3) +{ + auto column_to = ColumnDynamic::create(255); + column_to->insert(Field(42)); + column_to->insert(Field(42.42)); + column_to->insert(Field("str1")); + column_to->insert(Array({Field(42)})); + + checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}}); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow1) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertRangeFrom(*column_from, 0, 4); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("42.42")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("str")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("42.42")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow3) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insert(Field("Str")); + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("42.42")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow4) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(254); + column_to->insertRangeFrom(*column_from, 0, 3); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field("42")); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("42.42")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("str")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow5) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insert(Field("str")); + column_to->insertRangeFrom(*column_from, 0, 4); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + auto field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(42)); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("42.42")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("str")); +} + +TEST(ColumnDynamic, InsertRangeFromOverflow6) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(43)); + column_from->insert(Field(44)); + column_from->insert(Field(42.42)); + column_from->insert(Field(43.43)); + column_from->insert(Field("str")); + column_from->insert(Field(Array({Field(42)}))); + + auto column_to = getDynamicWithManyVariants(253); + column_to->insertRangeFrom(*column_from, 2, 5); + ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + auto field = (*column_to)[column_to->size() - 5]; + + ASSERT_EQ(field, Field("44")); + field = (*column_to)[column_to->size() - 4]; + ASSERT_EQ(field, Field(42.42)); + field = (*column_to)[column_to->size() - 3]; + ASSERT_EQ(field, Field(43.43)); + field = (*column_to)[column_to->size() - 2]; + ASSERT_EQ(field, Field("str")); + field = (*column_to)[column_to->size() - 1]; + ASSERT_EQ(field, Field("[42]")); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArena1) +{ + auto column = ColumnDynamic::create(255); + column->insert(Field(42)); + column->insert(Field(42.42)); + column->insert(Field("str")); + column->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column->serializeValueIntoArena(0, arena, pos); + column->serializeValueIntoArena(1, arena, pos); + column->serializeValueIntoArena(2, arena, pos); + column->serializeValueIntoArena(3, arena, pos); + pos = column->deserializeAndInsertFromArena(ref1.data); + pos = column->deserializeAndInsertFromArena(pos); + pos = column->deserializeAndInsertFromArena(pos); + column->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column)[column->size() - 4], 42); + ASSERT_EQ((*column)[column->size() - 3], 42.42); + ASSERT_EQ((*column)[column->size() - 2], "str"); + ASSERT_EQ((*column)[column->size() - 1], Null()); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArena2) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + column_from->serializeValueIntoArena(3, arena, pos); + + auto column_to = ColumnDynamic::create(255); + pos = column_to->deserializeAndInsertFromArena(ref1.data); + pos = column_to->deserializeAndInsertFromArena(pos); + pos = column_to->deserializeAndInsertFromArena(pos); + column_to->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column_from)[column_from->size() - 4], 42); + ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42); + ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); + ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); + ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)"); + std::vector expected_names = {"Float64", "Int8", "String"}; + ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names); + std::unordered_map expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}}; + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator); +} + +TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + column_from->serializeValueIntoArena(3, arena, pos); + + auto column_to = getDynamicWithManyVariants(253); + pos = column_to->deserializeAndInsertFromArena(ref1.data); + pos = column_to->deserializeAndInsertFromArena(pos); + pos = column_to->deserializeAndInsertFromArena(pos); + column_to->deserializeAndInsertFromArena(pos); + + ASSERT_EQ((*column_from)[column_from->size() - 4], 42); + ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42); + ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); + ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); +} + +TEST(ColumnDynamic, skipSerializedInArena) +{ + auto column_from = ColumnDynamic::create(255); + column_from->insert(Field(42)); + column_from->insert(Field(42.42)); + column_from->insert(Field("str")); + column_from->insert(Field(Null())); + + Arena arena; + const char * pos = nullptr; + auto ref1 = column_from->serializeValueIntoArena(0, arena, pos); + column_from->serializeValueIntoArena(1, arena, pos); + column_from->serializeValueIntoArena(2, arena, pos); + auto ref4 = column_from->serializeValueIntoArena(3, arena, pos); + + const char * end = ref4.data + ref4.size; + auto column_to = ColumnDynamic::create(255); + pos = column_to->skipSerializedInArena(ref1.data); + pos = column_to->skipSerializedInArena(pos); + pos = column_to->skipSerializedInArena(pos); + pos = column_to->skipSerializedInArena(pos); + + ASSERT_EQ(pos, end); + ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.empty()); + ASSERT_TRUE(column_to->getVariantInfo().variant_names.empty()); +} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 84e709294aa..7176c4d8850 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -871,6 +871,7 @@ class IColumn; M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \ M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \ M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ + M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ @@ -879,6 +880,7 @@ class IColumn; M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ + M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ diff --git a/src/Core/TypeId.h b/src/Core/TypeId.h index 7003e880cd5..26d9ab8595b 100644 --- a/src/Core/TypeId.h +++ b/src/Core/TypeId.h @@ -50,6 +50,7 @@ enum class TypeIndex IPv6, JSONPaths, Variant, + Dynamic }; /** diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 6e5760933eb..806a1577a21 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -75,6 +75,27 @@ void DataTypeArray::forEachChild(const ChildCallback & callback) const nested->forEachChild(callback); } +std::unique_ptr DataTypeArray::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const +{ + auto nested_type = assert_cast(*data.type).nested; + auto nested_data = std::make_unique(nested_type->getDefaultSerialization()); + nested_data->type = nested_type; + nested_data->column = data.column ? assert_cast(*data.column).getDataPtr() : nullptr; + + auto nested_subcolumn_data = nested_type->getSubcolumnData(subcolumn_name, *nested_data, throw_if_null); + if (!nested_subcolumn_data) + return nullptr; + + auto creator = SerializationArray::SubcolumnCreator(data.column ? assert_cast(*data.column).getOffsetsPtr() : nullptr); + auto res = std::make_unique(); + res->serialization = creator.create(nested_subcolumn_data->serialization); + res->type = creator.create(nested_subcolumn_data->type); + if (data.column) + res->column = creator.create(nested_subcolumn_data->column); + + return res; +} + static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.size() != 1) diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 4423f137e1a..b242d871c36 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -55,7 +55,12 @@ public: bool textCanContainOnlyValidUTF8() const override { return nested->textCanContainOnlyValidUTF8(); } bool isComparable() const override { return nested->isComparable(); } bool canBeComparedWithCollation() const override { return nested->canBeComparedWithCollation(); } - bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); } + bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); } + + /// Array column doesn't have subcolumns by itself but allows to read subcolumns of nested column. + /// If nested column has dynamic subcolumns, Array of this type should also be able to read these dynamic subcolumns. + bool hasDynamicSubcolumnsData() const override { return nested->hasDynamicSubcolumnsData(); } + std::unique_ptr getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override; bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp new file mode 100644 index 00000000000..2c6b3eba906 --- /dev/null +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNEXPECTED_AST_STRUCTURE; +} + +DataTypeDynamic::DataTypeDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) +{ +} + +MutableColumnPtr DataTypeDynamic::createColumn() const +{ + return ColumnDynamic::create(max_dynamic_types); +} + +String DataTypeDynamic::doGetName() const +{ + if (max_dynamic_types == DEFAULT_MAX_DYNAMIC_TYPES) + return "Dynamic"; + return "Dynamic(max_types=" + toString(max_dynamic_types) + ")"; +} + +Field DataTypeDynamic::getDefault() const +{ + return Field(Null()); +} + +SerializationPtr DataTypeDynamic::doGetDefaultSerialization() const +{ + return std::make_shared(max_dynamic_types); +} + +static DataTypePtr create(const ASTPtr & arguments) +{ + if (!arguments || arguments->children.empty()) + return std::make_shared(); + + if (arguments->children.size() > 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Dynamic data type can have only one optional argument - the maximum number of dynamic types in a form 'Dynamic(max_types=N)"); + + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->name != "equals") + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Dynamic data type argument should be in a form 'max_types=N'"); + + auto identifier_name = argument->arguments->children[0]->as()->name(); + if (identifier_name != "max_types") + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected identifier: {}. Dynamic data type argument should be in a form 'max_types=N'", identifier_name); + + auto literal = argument->arguments->children[1]->as(); + + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > 255) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); + + return std::make_shared(literal->value.get()); +} + +void registerDataTypeDynamic(DataTypeFactory & factory) +{ + factory.registerDataType("Dynamic", create); +} + +std::unique_ptr DataTypeDynamic::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const +{ + auto [subcolumn_type_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name); + /// Check if requested subcolumn is a valid data type. + auto subcolumn_type = DataTypeFactory::instance().tryGet(String(subcolumn_type_name)); + if (!subcolumn_type) + { + if (throw_if_null) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", subcolumn_type_name); + return nullptr; + } + + std::unique_ptr res = std::make_unique(subcolumn_type->getDefaultSerialization()); + res->type = subcolumn_type; + std::optional discriminator; + if (data.column) + { + /// If column was provided, we should extract subcolumn from Dynamic column. + const auto & dynamic_column = assert_cast(*data.column); + const auto & variant_info = dynamic_column.getVariantInfo(); + /// Check if provided Dynamic column has subcolumn of this type. + auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type->getName()); + if (it != variant_info.variant_name_to_discriminator.end()) + { + discriminator = it->second; + res->column = dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(*discriminator); + } + } + + /// Extract nested subcolumn of requested dynamic subcolumn if needed. + if (!subcolumn_nested_name.empty()) + { + res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); + if (!res) + return nullptr; + } + + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) + { + if (discriminator) + { + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// create full subcolumn from variant according to discriminators. + const auto & variant_column = assert_cast(*data.column).getVariantColumn(); + auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); + res->column = creator.create(res->column); + } + else + { + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + auto column = res->type->createColumn(); + column->insertManyDefaults(data.column->size()); + res->column = std::move(column); + } + } + + return res; +} + +} diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h new file mode 100644 index 00000000000..452e05061a0 --- /dev/null +++ b/src/DataTypes/DataTypeDynamic.h @@ -0,0 +1,53 @@ +#pragma once + +#include + +#define DEFAULT_MAX_DYNAMIC_TYPES 32 + + +namespace DB +{ + +class DataTypeDynamic final : public IDataType +{ +public: + static constexpr bool is_parametric = true; + + DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES); + + TypeIndex getTypeId() const override { return TypeIndex::Dynamic; } + const char * getFamilyName() const override { return "Dynamic"; } + + bool isParametric() const override { return true; } + bool canBeInsideNullable() const override { return false; } + bool supportsSparseSerialization() const override { return false; } + bool canBeInsideSparseColumns() const override { return false; } + bool isComparable() const override { return true; } + + MutableColumnPtr createColumn() const override; + + Field getDefault() const override; + + bool equals(const IDataType & rhs) const override + { + if (const auto * rhs_dynamic_type = typeid_cast(&rhs)) + return max_dynamic_types == rhs_dynamic_type->max_dynamic_types; + return false; + } + + bool haveSubtypes() const override { return false; } + + bool hasDynamicSubcolumnsData() const override { return true; } + std::unique_ptr getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override; + + size_t getMaxDynamicTypes() const { return max_dynamic_types; } + +private: + SerializationPtr doGetDefaultSerialization() const override; + String doGetName() const override; + + size_t max_dynamic_types; +}; + +} + diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 844384f3c95..a94526dce60 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -292,6 +292,7 @@ DataTypeFactory::DataTypeFactory() registerDataTypeMap(*this); registerDataTypeObject(*this); registerDataTypeVariant(*this); + registerDataTypeDynamic(*this); } DataTypeFactory & DataTypeFactory::instance() diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 4727cb3ae5c..86e0203358d 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -100,5 +100,6 @@ void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory); void registerDataTypeDomainGeo(DataTypeFactory & factory); void registerDataTypeObject(DataTypeFactory & factory); void registerDataTypeVariant(DataTypeFactory & factory); +void registerDataTypeDynamic(DataTypeFactory & factory); } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 7281cca1bb1..4866c3e78cc 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -42,7 +42,7 @@ public: bool isComparable() const override { return key_type->isComparable() && value_type->isComparable(); } bool isParametric() const override { return true; } bool haveSubtypes() const override { return true; } - bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); } + bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); } const DataTypePtr & getKeyType() const { return key_type; } const DataTypePtr & getValueType() const { return value_type; } diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h index 937a9091371..c610a1a8ba4 100644 --- a/src/DataTypes/DataTypeObject.h +++ b/src/DataTypes/DataTypeObject.h @@ -36,7 +36,7 @@ public: bool haveSubtypes() const override { return false; } bool equals(const IDataType & rhs) const override; bool isParametric() const override { return true; } - bool hasDynamicSubcolumns() const override { return true; } + bool hasDynamicSubcolumnsDeprecated() const override { return true; } SerializationPtr doGetDefaultSerialization() const override; diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 5bbd79160d4..71347011658 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -291,9 +291,9 @@ bool DataTypeTuple::haveMaximumSizeOfValue() const return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); }); } -bool DataTypeTuple::hasDynamicSubcolumns() const +bool DataTypeTuple::hasDynamicSubcolumnsDeprecated() const { - return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); }); + return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumnsDeprecated(); }); } bool DataTypeTuple::isComparable() const diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 15561fe4286..fd00fce5a17 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -52,7 +52,7 @@ public: bool isComparable() const override; bool textCanContainOnlyValidUTF8() const override; bool haveMaximumSizeOfValue() const override; - bool hasDynamicSubcolumns() const override; + bool hasDynamicSubcolumnsDeprecated() const override; size_t getMaximumSizeOfValueInMemory() const override; size_t getSizeOfValueInMemory() const override; diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp index db96972c00f..b918b79a2ed 100644 --- a/src/DataTypes/DataTypeVariant.cpp +++ b/src/DataTypes/DataTypeVariant.cpp @@ -33,6 +33,9 @@ DataTypeVariant::DataTypeVariant(const DataTypes & variants_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nullable/LowCardinality(Nullable) types are not allowed inside Variant type"); if (type->getTypeId() == TypeIndex::Variant) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nested Variant types are not allowed"); + if (type->getTypeId() == TypeIndex::Dynamic) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dynamic type is not allowed inside Variant type"); + /// Don't use Nothing type as a variant. if (!isNothing(type)) name_to_type[type->getName()] = type; @@ -42,9 +45,6 @@ DataTypeVariant::DataTypeVariant(const DataTypes & variants_) for (const auto & [_, type] : name_to_type) variants.push_back(type); - if (variants.empty()) - throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty"); - if (variants.size() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Variant type with more than {} nested types is not allowed", ColumnVariant::MAX_NESTED_COLUMNS); } @@ -113,9 +113,16 @@ bool DataTypeVariant::equals(const IDataType & rhs) const return false; for (size_t i = 0; i < size; ++i) + { if (!variants[i]->equals(*rhs_variant.variants[i])) return false; + /// The same data types with different custom names considered different. + /// For example, UInt8 and Bool. + if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName()) + return false; + } + return true; } @@ -129,17 +136,15 @@ bool DataTypeVariant::haveMaximumSizeOfValue() const return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); }); } -bool DataTypeVariant::hasDynamicSubcolumns() const +bool DataTypeVariant::hasDynamicSubcolumnsDeprecated() const { - return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); }); + return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumnsDeprecated(); }); } -std::optional DataTypeVariant::tryGetVariantDiscriminator(const IDataType & type) const +std::optional DataTypeVariant::tryGetVariantDiscriminator(const String & type_name) const { - String type_name = type.getName(); for (size_t i = 0; i != variants.size(); ++i) { - /// We don't use equals here, because it doesn't respect custom type names. if (variants[i]->getName() == type_name) return i; } @@ -187,7 +192,7 @@ void DataTypeVariant::forEachChild(const DB::IDataType::ChildCallback & callback static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) - throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty"); + return std::make_shared(DataTypes{}); DataTypes nested_types; nested_types.reserve(arguments->children.size()); diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h index dadc85ac3b3..1b561a083b1 100644 --- a/src/DataTypes/DataTypeVariant.h +++ b/src/DataTypes/DataTypeVariant.h @@ -45,14 +45,14 @@ public: bool haveSubtypes() const override { return true; } bool textCanContainOnlyValidUTF8() const override; bool haveMaximumSizeOfValue() const override; - bool hasDynamicSubcolumns() const override; + bool hasDynamicSubcolumnsDeprecated() const override; size_t getMaximumSizeOfValueInMemory() const override; const DataTypePtr & getVariant(size_t i) const { return variants[i]; } const DataTypes & getVariants() const { return variants; } /// Check if Variant has provided type in the list of variants and return its discriminator. - std::optional tryGetVariantDiscriminator(const IDataType & type) const; + std::optional tryGetVariantDiscriminator(const String & type_name) const; void forEachChild(const ChildCallback & callback) const override; diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 344b81be960..1c9715bbf53 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -101,14 +101,12 @@ void IDataType::forEachSubcolumn( data.serialization->enumerateStreams(settings, callback_with_data, data); } -template -Ptr IDataType::getForSubcolumn( +std::unique_ptr IDataType::getSubcolumnData( std::string_view subcolumn_name, const SubstreamData & data, - Ptr SubstreamData::*member, - bool throw_if_null) const + bool throw_if_null) { - Ptr res; + std::unique_ptr res; ISerialization::StreamCallback callback_with_data = [&](const auto & subpath) { @@ -120,7 +118,29 @@ Ptr IDataType::getForSubcolumn( auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len); /// Create data from path only if it's requested subcolumn. if (name == subcolumn_name) - res = ISerialization::createFromPath(subpath, prefix_len).*member; + { + res = std::make_unique(ISerialization::createFromPath(subpath, prefix_len)); + } + /// Check if this subcolumn is a prefix of requested subcolumn and it can create dynamic subcolumns. + else if (subcolumn_name.starts_with(name + ".") && subpath[i].data.type && subpath[i].data.type->hasDynamicSubcolumnsData()) + { + auto dynamic_subcolumn_name = subcolumn_name.substr(name.size() + 1); + auto dynamic_subcolumn_data = subpath[i].data.type->getDynamicSubcolumnData(dynamic_subcolumn_name, subpath[i].data, false); + if (dynamic_subcolumn_data) + { + /// Create requested subcolumn using dynamic subcolumn data. + auto tmp_subpath = subpath; + if (tmp_subpath[i].creator) + { + dynamic_subcolumn_data->type = tmp_subpath[i].creator->create(dynamic_subcolumn_data->type); + dynamic_subcolumn_data->column = tmp_subpath[i].creator->create(dynamic_subcolumn_data->column); + dynamic_subcolumn_data->serialization = tmp_subpath[i].creator->create(dynamic_subcolumn_data->serialization); + } + + tmp_subpath[i].data = *dynamic_subcolumn_data; + res = std::make_unique(ISerialization::createFromPath(tmp_subpath, prefix_len)); + } + } } subpath[i].visited = true; } @@ -130,8 +150,11 @@ Ptr IDataType::getForSubcolumn( settings.position_independent_encoding = false; data.serialization->enumerateStreams(settings, callback_with_data, data); + if (!res && data.type->hasDynamicSubcolumnsData()) + return data.type->getDynamicSubcolumnData(subcolumn_name, data, throw_if_null); + if (!res && throw_if_null) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, data.type->getName()); return res; } @@ -141,34 +164,51 @@ bool IDataType::hasSubcolumn(std::string_view subcolumn_name) const return tryGetSubcolumnType(subcolumn_name) != nullptr; } +bool IDataType::hasDynamicSubcolumns() const +{ + if (hasDynamicSubcolumnsData()) + return true; + + bool has_dynamic_subcolumns = false; + auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); + auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) + { + has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + }; + forEachSubcolumn(callback, data); + return has_dynamic_subcolumns; +} + DataTypePtr IDataType::tryGetSubcolumnType(std::string_view subcolumn_name) const { auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::type, false); + auto subcolumn_data = getSubcolumnData(subcolumn_name, data, false); + return subcolumn_data ? subcolumn_data->type : nullptr; } DataTypePtr IDataType::getSubcolumnType(std::string_view subcolumn_name) const { auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::type, true); + return getSubcolumnData(subcolumn_name, data, true)->type; } ColumnPtr IDataType::tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const { - auto data = SubstreamData(getDefaultSerialization()).withColumn(column); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::column, false); + auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()).withColumn(column); + auto subcolumn_data = getSubcolumnData(subcolumn_name, data, false); + return subcolumn_data ? subcolumn_data->column : nullptr; } ColumnPtr IDataType::getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const { - auto data = SubstreamData(getDefaultSerialization()).withColumn(column); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::column, true); + auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()).withColumn(column); + return getSubcolumnData(subcolumn_name, data, true)->column; } SerializationPtr IDataType::getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const { - auto data = SubstreamData(serialization); - return getForSubcolumn(subcolumn_name, data, &SubstreamData::serialization, true); + auto data = SubstreamData(serialization).withType(getPtr()); + return getSubcolumnData(subcolumn_name, data, true)->serialization; } Names IDataType::getSubcolumnNames() const @@ -323,6 +363,7 @@ bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \ bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \ bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \ bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \ +bool isDynamic(TYPE data_type) { return WhichDataType(data_type).isDynamic(); } \ bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \ \ bool isColumnedAsNumber(TYPE data_type) \ diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index eaf798a3017..dde61ca3a48 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -311,8 +311,13 @@ public: /// Strings, Numbers, Date, DateTime, Nullable virtual bool canBeInsideLowCardinality() const { return false; } - /// Object, Array(Object), Tuple(..., Object, ...) - virtual bool hasDynamicSubcolumns() const { return false; } + /// Checks for deprecated Object type usage recursively: Object, Array(Object), Tuple(..., Object, ...) + virtual bool hasDynamicSubcolumnsDeprecated() const { return false; } + + /// Checks if column has dynamic subcolumns. + virtual bool hasDynamicSubcolumns() const; + /// Checks if column can create dynamic subcolumns data and getDynamicSubcolumnData can be called. + virtual bool hasDynamicSubcolumnsData() const { return false; } /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); @@ -329,16 +334,25 @@ protected: mutable SerializationPtr custom_serialization; public: + bool hasCustomName() const { return static_cast(custom_name.get()); } const IDataTypeCustomName * getCustomName() const { return custom_name.get(); } const ISerialization * getCustomSerialization() const { return custom_serialization.get(); } -private: - template - Ptr getForSubcolumn( +protected: + static std::unique_ptr getSubcolumnData( std::string_view subcolumn_name, const SubstreamData & data, - Ptr SubstreamData::*member, - bool throw_if_null) const; + bool throw_if_null); + + virtual std::unique_ptr getDynamicSubcolumnData( + std::string_view /*subcolumn_name*/, + const SubstreamData & /*data*/, + bool throw_if_null) const + { + if (throw_if_null) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDynamicSubcolumnData() is not implemented for type {}", getName()); + return nullptr; + } }; @@ -423,6 +437,7 @@ struct WhichDataType constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; } constexpr bool isVariant() const { return idx == TypeIndex::Variant; } + constexpr bool isDynamic() const { return idx == TypeIndex::Dynamic; } }; /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) @@ -483,6 +498,7 @@ bool isMap(TYPE data_type); \ bool isInterval(TYPE data_type); \ bool isObject(TYPE data_type); \ bool isVariant(TYPE data_type); \ +bool isDynamic(TYPE data_type); \ bool isNothing(TYPE data_type); \ \ bool isColumnedAsNumber(TYPE data_type); \ diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 99cf092e6cd..107e3a50025 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -177,7 +177,7 @@ static std::pair convertObjectColumnToTuple( static std::pair recursivlyConvertDynamicColumnToTuple( const ColumnPtr & column, const DataTypePtr & type) { - if (!type->hasDynamicSubcolumns()) + if (!type->hasDynamicSubcolumnsDeprecated()) return {column, type}; if (const auto * type_object = typeid_cast(type.get())) @@ -243,7 +243,7 @@ void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & sto { for (auto & column : block) { - if (!column.type->hasDynamicSubcolumns()) + if (!column.type->hasDynamicSubcolumnsDeprecated()) continue; std::tie(column.column, column.type) @@ -417,7 +417,7 @@ static DataTypePtr getLeastCommonTypeForTuple( static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl( const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths) { - if (!type_in_storage->hasDynamicSubcolumns()) + if (!type_in_storage->hasDynamicSubcolumnsDeprecated()) return type_in_storage; if (isObject(type_in_storage)) @@ -459,7 +459,7 @@ DataTypePtr getLeastCommonTypeForDynamicColumns( DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage) { - if (!type_in_storage->hasDynamicSubcolumns()) + if (!type_in_storage->hasDynamicSubcolumnsDeprecated()) return type_in_storage; if (isObject(type_in_storage)) @@ -494,7 +494,7 @@ bool hasDynamicSubcolumns(const ColumnsDescription & columns) return std::any_of(columns.begin(), columns.end(), [](const auto & column) { - return column.type->hasDynamicSubcolumns(); + return column.type->hasDynamicSubcolumnsDeprecated(); }); } @@ -1065,7 +1065,7 @@ Field FieldVisitorFoldDimension::operator()(const Null & x) const void setAllObjectsToDummyTupleType(NamesAndTypesList & columns) { for (auto & column : columns) - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) column.type = createConcreteEmptyDynamicColumn(column.type); } diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index 3e3b1b96740..6599d8adef1 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -194,7 +194,7 @@ ColumnsDescription getConcreteObjectColumns( /// dummy column will be removed. for (const auto & column : storage_columns) { - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) types_in_entries[column.name].push_back(createConcreteEmptyDynamicColumn(column.type)); } @@ -204,7 +204,7 @@ ColumnsDescription getConcreteObjectColumns( for (const auto & column : entry_columns) { auto storage_column = storage_columns.tryGetPhysical(column.name); - if (storage_column && storage_column->type->hasDynamicSubcolumns()) + if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) types_in_entries[column.name].push_back(column.type); } } diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index a3a28f8091c..dbe27a5f3f6 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -196,6 +196,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == SubstreamType::DynamicStructure) + stream_name += ".dynamic_structure"; } return stream_name; @@ -271,6 +273,23 @@ ColumnPtr ISerialization::getFromSubstreamsCache(SubstreamsCache * cache, const return it == cache->end() ? nullptr : it->second; } +void ISerialization::addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state) +{ + if (!cache || path.empty()) + return; + + cache->emplace(getSubcolumnNameForStream(path), state); +} + +ISerialization::DeserializeBinaryBulkStatePtr ISerialization::getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path) +{ + if (!cache || path.empty()) + return nullptr; + + auto it = cache->find(getSubcolumnNameForStream(path)); + return it == cache->end() ? nullptr : it->second; +} + bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path) { for (const auto & elem : path) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index ebaa26d19a6..65493cf6dda 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -160,6 +160,9 @@ public: VariantElements, VariantElement, + DynamicData, + DynamicStructure, + Regular, }; @@ -231,6 +234,8 @@ public: using SerializeBinaryBulkStatePtr = std::shared_ptr; using DeserializeBinaryBulkStatePtr = std::shared_ptr; + using SubstreamsDeserializeStatesCache = std::unordered_map; + struct SerializeBinaryBulkSettings { OutputStreamGetter getter; @@ -240,6 +245,14 @@ public: bool low_cardinality_use_single_dictionary_for_part = true; bool position_independent_encoding = true; + + enum class DynamicStatisticsMode + { + NONE, /// Don't write statistics. + PREFIX, /// Write statistics in prefix. + SUFFIX, /// Write statistics in suffix. + }; + DynamicStatisticsMode dynamic_write_statistics = DynamicStatisticsMode::NONE; }; struct DeserializeBinaryBulkSettings @@ -256,6 +269,8 @@ public: /// If not zero, may be used to avoid reallocations while reading column of String type. double avg_value_size_hint = 0; + + bool dynamic_read_statistics = false; }; /// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark. @@ -273,7 +288,8 @@ public: /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr. virtual void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & /*settings*/, - DeserializeBinaryBulkStatePtr & /*state*/) const {} + DeserializeBinaryBulkStatePtr & /*state*/, + SubstreamsDeserializeStatesCache * /*cache*/) const {} /** 'offset' and 'limit' are used to specify range. * limit = 0 - means no limit. @@ -393,6 +409,9 @@ public: static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column); static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); + static void addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state); + static DeserializeBinaryBulkStatePtr getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path); + static bool isSpecialCompressionAllowed(const SubstreamPath & path); static size_t getArrayLevel(const SubstreamPath & path); diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index e8aab615849..d6546b338b5 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -284,10 +284,11 @@ void SerializationArray::serializeBinaryBulkStateSuffix( void SerializationArray::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::ArrayElements); - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index 82f5e8bce45..c3353f0c251 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -55,7 +55,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -71,7 +72,6 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; -private: struct SubcolumnCreator : public ISubcolumnCreator { const ColumnPtr offsets; diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp new file mode 100644 index 00000000000..c9fe8dd6b29 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -0,0 +1,645 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + +void SerializationDynamic::enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const +{ + settings.path.push_back(Substream::DynamicStructure); + callback(settings.path); + settings.path.pop_back(); + + const auto * column_dynamic = data.column ? &assert_cast(*data.column) : nullptr; + + /// If column is nullptr, nothing to enumerate as we don't have any variants. + if (!column_dynamic) + return; + + const auto & variant_info = column_dynamic->getVariantInfo(); + auto variant_serialization = variant_info.variant_type->getDefaultSerialization(); + + settings.path.push_back(Substream::DynamicData); + auto variant_data = SubstreamData(variant_serialization) + .withType(variant_info.variant_type) + .withColumn(column_dynamic->getVariantColumnPtr()) + .withSerializationInfo(data.serialization_info); + settings.path.back().data = variant_data; + variant_serialization->enumerateStreams(settings, callback, variant_data); + settings.path.pop_back(); +} + +SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast(version)) +{ + checkVersion(version); +} + +void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version) +{ + if (version != VariantTypeName) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization."); +} + +struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState +{ + SerializationDynamic::DynamicStructureSerializationVersion structure_version; + DataTypePtr variant_type; + Names variant_names; + SerializationPtr variant_serialization; + ISerialization::SerializeBinaryBulkStatePtr variant_state; + + /// Pointer to currently serialized dynamic column. + /// Used to calculate statistics for the whole column and not for some range. + const ColumnDynamic * current_dynamic_column = nullptr; + + /// Variants statistics. Map (Variant name) -> (Variant size). + ColumnDynamic::Statistics statistics = { .source =ColumnDynamic::Statistics::Source::READ }; + + SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} + + void updateStatistics(const ColumnVariant & column_variant) + { + for (size_t i = 0; i != variant_names.size(); ++i) + statistics.data[variant_names[i]] += column_variant.getVariantPtrByGlobalDiscriminator(i)->size(); + } +}; + +struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState +{ + SerializationPtr variant_serialization; + ISerialization::DeserializeBinaryBulkStatePtr variant_state; + ISerialization::DeserializeBinaryBulkStatePtr structure_state; +}; + +void SerializationDynamic::serializeBinaryBulkStatePrefix( + const DB::IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + const auto & column_dynamic = assert_cast(column); + const auto & variant_info = column_dynamic.getVariantInfo(); + + settings.path.push_back(Substream::DynamicStructure); + auto * stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix"); + + /// Write structure serialization version. + UInt64 structure_version = DynamicStructureSerializationVersion::Value::VariantTypeName; + writeBinaryLittleEndian(structure_version, *stream); + auto dynamic_state = std::make_shared(structure_version); + + dynamic_state->variant_type = variant_info.variant_type; + dynamic_state->variant_names = variant_info.variant_names; + const auto & variant_column = column_dynamic.getVariantColumn(); + + /// Write internal Variant type name. + writeStringBinary(dynamic_state->variant_type->getName(), *stream); + + /// Write statistics in prefix if needed. + if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX) + { + const auto & statistics = column_dynamic.getStatistics(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + size_t size = 0; + /// Use statistics from column if it was created during merge. + if (statistics.data.empty() || statistics.source != ColumnDynamic::Statistics::Source::MERGE) + size = variant_column.getVariantByGlobalDiscriminator(i).size(); + /// Otherwise we can use only variant sizes from current column. + else + size = statistics.data.at(variant_info.variant_names[i]); + writeVarUInt(size, *stream); + } + } + + dynamic_state->variant_serialization = dynamic_state->variant_type->getDefaultSerialization(); + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->serializeBinaryBulkStatePrefix(variant_column, settings, dynamic_state->variant_state); + settings.path.pop_back(); + + state = std::move(dynamic_state); +} + +void SerializationDynamic::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr structure_state = deserializeDynamicStructureStatePrefix(settings, cache); + if (!structure_state) + return; + + auto dynamic_state = std::make_shared(); + dynamic_state->structure_state = structure_state; + dynamic_state->variant_serialization = checkAndGetState(structure_state)->variant_type->getDefaultSerialization(); + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_state->variant_state, cache); + settings.path.pop_back(); + + state = std::move(dynamic_state); +} + +ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeDynamicStructureStatePrefix( + DeserializeBinaryBulkSettings & settings, SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::DynamicStructure); + + DeserializeBinaryBulkStatePtr state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + state = cached_state; + } + else if (auto * structure_stream = settings.getter(settings.path)) + { + /// Read structure serialization version. + UInt64 structure_version; + readBinaryLittleEndian(structure_version, *structure_stream); + auto structure_state = std::make_shared(structure_version); + /// Read internal Variant type name. + String data_type_name; + readStringBinary(data_type_name, *structure_stream); + structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + const auto * variant_type = typeid_cast(structure_state->variant_type.get()); + if (!variant_type) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName()); + + /// Read statistics. + if (settings.dynamic_read_statistics) + { + const auto & variants = variant_type->getVariants(); + size_t variant_size; + for (const auto & variant : variants) + { + readVarUInt(variant_size, *structure_stream); + structure_state->statistics.data[variant->getName()] = variant_size; + } + } + + state = structure_state; + addToSubstreamsDeserializeStatesCache(cache, settings.path, state); + } + + settings.path.pop_back(); + return state; +} + +void SerializationDynamic::serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const +{ + auto * dynamic_state = checkAndGetState(state); + settings.path.push_back(Substream::DynamicStructure); + auto * stream = settings.getter(settings.path); + settings.path.pop_back(); + + if (!stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix"); + + /// Write statistics in suffix if needed. + if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX) + { + for (const auto & variant_name : dynamic_state->variant_names) + writeVarUInt(dynamic_state->statistics.data[variant_name], *stream); + } + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->serializeBinaryBulkStateSuffix(settings, dynamic_state->variant_state); + settings.path.pop_back(); +} + +void SerializationDynamic::serializeBinaryBulkWithMultipleStreams( + const DB::IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + const auto & column_dynamic = assert_cast(column); + auto * dynamic_state = checkAndGetState(state); + const auto & variant_info = column_dynamic.getVariantInfo(); + const auto * variant_column = &column_dynamic.getVariantColumn(); + + if (!variant_info.variant_type->equals(*dynamic_state->variant_type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName()); + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state); + settings.path.pop_back(); +} + +void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( + DB::ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + if (!state) + return; + + auto mutable_column = column->assumeMutable(); + auto * dynamic_state = checkAndGetState(state); + auto * structure_state = checkAndGetState(dynamic_state->structure_state); + + if (mutable_column->empty()) + mutable_column = ColumnDynamic::create(structure_state->variant_type->createColumn(), structure_state->variant_type, max_dynamic_types, structure_state->statistics); + + auto & column_dynamic = assert_cast(*mutable_column); + const auto & variant_info = column_dynamic.getVariantInfo(); + if (!variant_info.variant_type->equals(*structure_state->variant_type)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", structure_state->variant_type->getName(), variant_info.variant_type->getName()); + + settings.path.push_back(Substream::DynamicData); + dynamic_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(column_dynamic.getVariantColumnPtr(), limit, settings, dynamic_state->variant_state, cache); + settings.path.pop_back(); + + column = std::move(mutable_column); +} + +void SerializationDynamic::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const +{ + UInt8 null_bit = field.isNull(); + writeBinary(null_bit, ostr); + if (null_bit) + return; + + auto field_type = applyVisitor(FieldToDataType(), field); + auto field_type_name = field_type->getName(); + writeVarUInt(field_type_name.size(), ostr); + writeString(field_type_name, ostr); + field_type->getDefaultSerialization()->serializeBinary(field, ostr, settings); +} + +void SerializationDynamic::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const +{ + UInt8 null_bit; + readBinary(null_bit, istr); + if (null_bit) + { + field = Null(); + return; + } + + size_t field_type_name_size; + readVarUInt(field_type_name_size, istr); + String field_type_name(field_type_name_size, 0); + istr.readStrict(field_type_name.data(), field_type_name_size); + auto field_type = DataTypeFactory::instance().get(field_type_name); + field_type->getDefaultSerialization()->deserializeBinary(field, istr, settings); +} + +void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + const auto & variant_info = dynamic_column.getVariantInfo(); + const auto & variant_column = dynamic_column.getVariantColumn(); + auto global_discr = variant_column.globalDiscriminatorAt(row_num); + + UInt8 null_bit = global_discr == ColumnVariant::NULL_DISCRIMINATOR; + writeBinary(null_bit, ostr); + if (null_bit) + return; + + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(global_discr); + const auto & variant_type_name = variant_info.variant_names[global_discr]; + writeVarUInt(variant_type_name.size(), ostr); + writeString(variant_type_name, ostr); + variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); +} + +template +static void deserializeVariant( + ColumnVariant & variant_column, + const DataTypePtr & variant_type, + ColumnVariant::Discriminator global_discr, + ReadBuffer & istr, + DeserializeFunc deserialize) +{ + auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discr); + deserialize(*variant_type->getDefaultSerialization(), variant, istr); + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discr)); + variant_column.getOffsets().push_back(variant.size() - 1); +} + +void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto & dynamic_column = assert_cast(column); + UInt8 null_bit; + readBinary(null_bit, istr); + if (null_bit) + { + dynamic_column.insertDefault(); + return; + } + + size_t variant_type_name_size; + readVarUInt(variant_type_name_size, istr); + String variant_type_name(variant_type_name_size, 0); + istr.readStrict(variant_type_name.data(), variant_type_name_size); + + const auto & variant_info = dynamic_column.getVariantInfo(); + auto it = variant_info.variant_name_to_discriminator.find(variant_type_name); + if (it != variant_info.variant_name_to_discriminator.end()) + { + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(it->second); + deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); + return; + } + + /// We don't have this variant yet. Let's try to add it. + auto variant_type = DataTypeFactory::instance().get(variant_type_name); + if (dynamic_column.addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name); + deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); + return; + } + + /// We reached maximum number of variants and couldn't add new variant. + /// This case should be really rare in real use cases. + /// We should always be able to add String variant and insert value as String. + dynamic_column.addStringVariant(); + auto tmp_variant_column = variant_type->createColumn(); + variant_type->getDefaultSerialization()->deserializeBinary(*tmp_variant_column, istr, settings); + auto string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared()); + auto & variant_column = dynamic_column.getVariantColumn(); + variant_column.insertIntoVariantFrom(variant_info.variant_name_to_discriminator.at("String"), *string_column, 0); +} + +void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextCSV(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +template +static void deserializeTextImpl( + IColumn & column, + ReadBuffer & istr, + const FormatSettings & settings, + ReadFieldFunc read_field, + FormatSettings::EscapingRule escaping_rule, + TryDeserializeVariantFunc try_deserialize_variant, + DeserializeVariant deserialize_variant) +{ + auto & dynamic_column = assert_cast(column); + auto & variant_column = dynamic_column.getVariantColumn(); + const auto & variant_info = dynamic_column.getVariantInfo(); + String field = read_field(istr); + auto field_buf = std::make_unique(field); + JSONInferenceInfo json_info; + auto variant_type = tryInferDataTypeByEscapingRule(field, settings, escaping_rule, &json_info); + if (escaping_rule == FormatSettings::EscapingRule::JSON) + transformFinalInferredJSONTypeIfNeeded(variant_type, settings, &json_info); + + if (checkIfTypeIsComplete(variant_type) && dynamic_column.addNewVariant(variant_type)) + { + auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName()); + deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, *field_buf, deserialize_variant); + return; + } + + /// We couldn't infer type or add new variant. Try to insert field into current variants. + field_buf = std::make_unique(field); + if (try_deserialize_variant(*variant_info.variant_type->getDefaultSerialization(), variant_column, *field_buf)) + return; + + /// We couldn't insert field into any existing variant, add String variant and read value as String. + dynamic_column.addStringVariant(); + + if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\'')) + field = "'" + field + "'"; + + field_buf = std::make_unique(field); + auto string_discr = variant_info.variant_name_to_discriminator.at("String"); + deserializeVariant(dynamic_column.getVariantColumn(), std::make_shared(), string_discr, *field_buf, deserialize_variant); +} + +void SerializationDynamic::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [&settings](ReadBuffer & buf) + { + String field; + readCSVField(field, buf, settings.csv); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextCSV(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextCSV(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::CSV, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextCSV(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextEscaped(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readEscapedString(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextEscaped(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextEscaped(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Escaped, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextEscaped(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextQuoted(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readQuotedField(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextQuoted(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextQuoted(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Quoted, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextQuoted(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSON(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [&settings](ReadBuffer & buf) + { + String field; + readJSONField(field, buf, settings.json); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextJSON(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextJSON(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::JSON, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextJSON(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextRaw(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readString(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeTextRaw(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeTextRaw(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Raw, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeTextRaw(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeTextRaw(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeText(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +void SerializationDynamic::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto read_field = [](ReadBuffer & buf) + { + String field; + readStringUntilEOF(field, buf); + return field; + }; + + auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + return serialization.tryDeserializeWholeText(col, buf, settings); + }; + + auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf) + { + serialization.deserializeWholeText(col, buf, settings); + }; + + deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Raw, try_deserialize_variant, deserialize_variant); +} + +bool SerializationDynamic::tryDeserializeWholeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +{ + deserializeWholeText(column, istr, settings); + return true; +} + +void SerializationDynamic::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & dynamic_column = assert_cast(column); + dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextXML(dynamic_column.getVariantColumn(), row_num, ostr, settings); +} + +} diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h new file mode 100644 index 00000000000..4803bc25d18 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamic.h @@ -0,0 +1,116 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class SerializationDynamicElement; + +class SerializationDynamic : public ISerialization +{ +public: + SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) + { + } + + struct DynamicStructureSerializationVersion + { + enum Value + { + VariantTypeName = 1, + }; + + Value value; + + static void checkVersion(UInt64 version); + + explicit DynamicStructureSerializationVersion(UInt64 version); + }; + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + static DeserializeBinaryBulkStatePtr deserializeDynamicStructureStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache); + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + +private: + friend SerializationDynamicElement; + + struct DeserializeBinaryBulkStateDynamicStructure : public ISerialization::DeserializeBinaryBulkState + { + DynamicStructureSerializationVersion structure_version; + DataTypePtr variant_type; + ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ}; + + explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {} + }; + + size_t max_dynamic_types; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp new file mode 100644 index 00000000000..386a6579519 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +void SerializationDynamicElement::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData &) const +{ + settings.path.push_back(Substream::DynamicStructure); + callback(settings.path); + settings.path.pop_back(); + + /// We don't know if we have actually have this variant in Dynamic column, + /// so we cannot enumerate variant streams. +} + +void SerializationDynamicElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationDynamicElement"); +} + +void SerializationDynamicElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationDynamicElement"); +} + +struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState +{ + ISerialization::DeserializeBinaryBulkStatePtr structure_state; + SerializationPtr variant_serialization; + ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; +}; + +void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr structure_state = SerializationDynamic::deserializeDynamicStructureStatePrefix(settings, cache); + if (!structure_state) + return; + + auto dynamic_element_state = std::make_shared(); + dynamic_element_state->structure_state = std::move(structure_state); + const auto & variant_type = checkAndGetState(structure_state)->variant_type; + /// Check if we actually have required element in the Variant. + if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) + { + settings.path.push_back(Substream::DynamicData); + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); + settings.path.pop_back(); + } + + state = std::move(dynamic_element_state); +} + +void SerializationDynamicElement::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationDynamicElement"); +} + +void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + auto * dynamic_element_state = checkAndGetState(state); + + if (dynamic_element_state->variant_serialization) + { + settings.path.push_back(Substream::DynamicData); + dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); + settings.path.pop_back(); + } + else + { + auto mutable_column = result_column->assumeMutable(); + mutable_column->insertManyDefaults(limit); + result_column = std::move(mutable_column); + } +} + +} diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h new file mode 100644 index 00000000000..9e4980e0a27 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace DB +{ + + +/// Serialization for Dynamic element when we read it as a subcolumn. +class SerializationDynamicElement final : public SerializationWrapper +{ +private: + /// To be able to deserialize Dyna,ic element as a subcolumn + /// we need its type name and global discriminator. + String dynamic_element_name; + +public: + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) + : SerializationWrapper(nested_) + , dynamic_element_name(dynamic_element_name_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationInterval.cpp b/src/DataTypes/Serializations/SerializationInterval.cpp index 59086d8aef3..2157566895d 100644 --- a/src/DataTypes/Serializations/SerializationInterval.cpp +++ b/src/DataTypes/Serializations/SerializationInterval.cpp @@ -68,9 +68,9 @@ void SerializationInterval::deserializeBinaryBulk(IColumn & column, ReadBuffer & } void SerializationInterval::deserializeBinaryBulkStatePrefix( - DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { - dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state); + dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state, cache); } diff --git a/src/DataTypes/Serializations/SerializationInterval.h b/src/DataTypes/Serializations/SerializationInterval.h index a4e6c204e4f..368aff4f0c3 100644 --- a/src/DataTypes/Serializations/SerializationInterval.h +++ b/src/DataTypes/Serializations/SerializationInterval.h @@ -34,7 +34,10 @@ public: void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; - void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const override; + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 9efe05042ed..802da263d89 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -267,7 +267,8 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix( void SerializationLowCardinality::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * /*cache*/) const { settings.path.push_back(Substream::DictionaryKeys); auto * stream = settings.getter(settings.path); diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index d2c3a95c702..aa64e956a64 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -33,7 +33,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 7b6f87baf2e..dac4fbe88e0 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -420,9 +420,10 @@ void SerializationMap::serializeBinaryBulkStateSuffix( void SerializationMap::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); } diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 3e27ef1b04a..cfcde445c1f 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -51,7 +51,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationNamed.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp index 2792827e690..07f5f9ea7ed 100644 --- a/src/DataTypes/Serializations/SerializationNamed.cpp +++ b/src/DataTypes/Serializations/SerializationNamed.cpp @@ -54,10 +54,11 @@ void SerializationNamed::serializeBinaryBulkStateSuffix( void SerializationNamed::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { addToPath(settings.path); - nested_serialization->deserializeBinaryBulkStatePrefix(settings, state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNamed.h b/src/DataTypes/Serializations/SerializationNamed.h index 0633ba2ea6f..bb2161e40e6 100644 --- a/src/DataTypes/Serializations/SerializationNamed.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -36,7 +36,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 4d31451f92d..477349f955d 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -95,10 +95,11 @@ void SerializationNullable::serializeBinaryBulkStateSuffix( void SerializationNullable::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { settings.path.push_back(Substream::NullableElements); - nested->deserializeBinaryBulkStatePrefix(settings, state); + nested->deserializeBinaryBulkStatePrefix(settings, state, cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index 37858ccdefd..f7d2d2eadf0 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -29,7 +29,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 67bf7af7799..88244a89204 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -210,7 +210,8 @@ void SerializationObject::serializeBinaryBulkStateSuffix( template void SerializationObject::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { checkSerializationIsSupported(settings); if (state) @@ -258,7 +259,7 @@ void SerializationObject::deserializeBinaryBulkStatePrefix( } settings.path.push_back(Substream::ObjectData); - state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state); + state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state, cache); settings.path.pop_back(); state = std::move(state_object); diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index 39e1c514640..4cb7d0ab6a8 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -41,7 +41,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index 4d7514271ad..f9228069b90 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -152,7 +152,7 @@ void SerializationSparse::enumerateStreams( const StreamCallback & callback, const SubstreamData & data) const { - const auto * column_sparse = data.column ? &assert_cast(*data.column) : nullptr; + const auto * column_sparse = data.column ? typeid_cast(data.column.get()) : nullptr; size_t column_size = column_sparse ? column_sparse->size() : 0; settings.path.push_back(Substream::SparseOffsets); @@ -242,12 +242,13 @@ void SerializationSparse::serializeBinaryBulkStateSuffix( void SerializationSparse::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto state_sparse = std::make_shared(); settings.path.push_back(Substream::SparseElements); - nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested); + nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested, cache); settings.path.pop_back(); state = std::move(state_sparse); diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index b1ed7b613f0..a55856bacf0 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -43,7 +43,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; /// Allows to write ColumnSparse and other columns in sparse serialization. void serializeBinaryBulkWithMultipleStreams( diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 632a019d2d9..bb7c19aa78d 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -606,13 +606,14 @@ void SerializationTuple::serializeBinaryBulkStateSuffix( void SerializationTuple::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto tuple_state = std::make_shared(); tuple_state->states.resize(elems.size()); for (size_t i = 0; i < elems.size(); ++i) - elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]); + elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i], cache); state = std::move(tuple_state); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index d9c63a05217..810673d8b21 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -53,7 +53,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 8ca86c63bf6..3fe26b773e3 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -123,7 +123,8 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { auto variant_state = std::make_shared(); variant_state->states.resize(variants.size()); @@ -132,7 +133,7 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i]); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index 3f53dcf1339..0de786f5561 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -59,7 +59,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 7d4487fe6da..4f120ecac06 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -55,12 +56,13 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; -void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const +void SerializationVariantElement::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { auto variant_element_state = std::make_shared(); addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); state = std::move(variant_element_state); @@ -80,6 +82,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( { auto * variant_element_state = checkAndGetState(state); + size_t variant_limit = 0; /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) @@ -96,17 +99,30 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); +// ColumnVariant::Discriminator discr; +// readBinaryLittleEndian(discr, *discriminators_stream); +// if (discr == ColumnVariant::NULL_DISCRIMINATOR) +// { SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); +// } +// else +// { +// auto & discriminators_data = assert_cast(*variant_element_state->discriminators->assumeMutable()).getData(); +// discriminators_data.resize_fill(discriminators_data.size() + limit, discr); +// } + addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } settings.path.pop_back(); - /// Iterate through new discriminators to calculate the limit for our variant. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - size_t variant_limit = 0; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); + /// Iterate through new discriminators to calculate the limit for our variant. + if (!variant_limit) + { + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); + } /// Now we know the limit for our variant and can deserialize it. diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index aafecf43d39..0ce0a72e250 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -43,7 +43,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, @@ -59,12 +60,6 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; -private: - friend SerializationVariant; - - void addVariantToPath(SubstreamPath & path) const; - void removeVariantFromPath(SubstreamPath & path) const; - struct VariantSubcolumnCreator : public ISubcolumnCreator { const ColumnPtr local_discriminators; @@ -82,6 +77,11 @@ private: ColumnPtr create(const ColumnPtr & prev) const override; SerializationPtr create(const SerializationPtr & prev) const override; }; +private: + friend SerializationVariant; + + void addVariantToPath(SubstreamPath & path) const; + void removeVariantFromPath(SubstreamPath & path) const; }; } diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index bde52bb8096..ecef533d7e0 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -29,9 +29,10 @@ void SerializationWrapper::serializeBinaryBulkStateSuffix( void SerializationWrapper::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const { - nested_serialization->deserializeBinaryBulkStatePrefix(settings, state); + nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache); } void SerializationWrapper::serializeBinaryBulkWithMultipleStreams( diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 6c5e2046062..882f17bba0a 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -36,7 +36,8 @@ public: void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, - DeserializeBinaryBulkStatePtr & state) const override; + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; void serializeBinaryBulkWithMultipleStreams( const IColumn & column, diff --git a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp index fc7432d5bf6..c6337a31fce 100644 --- a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp +++ b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp @@ -49,7 +49,7 @@ TEST(SerializationObject, FromString) settings.position_independent_encoding = false; settings.getter = [&in](const auto &) { return ∈ }; - serialization->deserializeBinaryBulkStatePrefix(settings, state); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(result_column, column_string->size(), settings, state, nullptr); } diff --git a/src/DataTypes/Utils.cpp b/src/DataTypes/Utils.cpp index 2f29d57d454..e7e69e379af 100644 --- a/src/DataTypes/Utils.cpp +++ b/src/DataTypes/Utils.cpp @@ -224,6 +224,7 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ case TypeIndex::Nothing: case TypeIndex::JSONPaths: case TypeIndex::Variant: + case TypeIndex::Dynamic: return false; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 59b3e52e139..330bc28be61 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -929,6 +929,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep query_context->setSetting("allow_experimental_hash_functions", 1); query_context->setSetting("allow_experimental_object_type", 1); query_context->setSetting("allow_experimental_variant_type", 1); + query_context->setSetting("allow_experimental_dynamic_type", 1); query_context->setSetting("allow_experimental_annoy_index", 1); query_context->setSetting("allow_experimental_usearch_index", 1); query_context->setSetting("allow_experimental_bigint_types", 1); diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 5b7995e0da2..deff44a0d9b 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -43,9 +43,9 @@ struct FormatSettings String column_names_for_schema_inference{}; String schema_inference_hints{}; - bool try_infer_integers = false; - bool try_infer_dates = false; - bool try_infer_datetimes = false; + bool try_infer_integers = true; + bool try_infer_dates = true; + bool try_infer_datetimes = true; bool try_infer_exponent_floats = false; enum class DateTimeInputFormat diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 8286b24d0a6..39915b0735e 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -93,7 +93,7 @@ void NativeReader::readData(const ISerialization & serialization, ColumnPtr & co ISerialization::DeserializeBinaryBulkStatePtr state; - serialization.deserializeBinaryBulkStatePrefix(settings, state); + serialization.deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr); if (column->size() != rows) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 7049ca44110..75f8979e727 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +64,7 @@ #include #include +#include namespace DB { @@ -1815,6 +1818,7 @@ struct ConvertImpl /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. +template struct ConvertImplGenericFromString { static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) @@ -1854,29 +1858,34 @@ struct ConvertImplGenericFromString { serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); } - catch (const Exception & e) + catch (const Exception &) { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; + if constexpr (throw_on_error) + throw; + /// Check if exception happened after we inserted the value + /// (deserializeWholeText should not do it, but let's check anyway). + if (column_to.size() > i) + column_to.popBack(column_to.size() - i); + column_to.insertDefault(); } + /// Usually deserializeWholeText checks for eof after parsing, but let's check one more time just in case. if (!read_buffer.eof()) { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + if constexpr (throw_on_error) + { + if (result_type) + throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + else + throw Exception( + ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to column {}. Expected eof", column_to.getName()); + } else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); + { + if (column_to.size() > i) + column_to.popBack(column_to.size() - i); + column_to.insertDefault(); + } } } } @@ -3279,7 +3288,9 @@ private: { if (checkAndGetDataType(from_type.get())) { - return &ConvertImplGenericFromString::execute; + if (cast_type == CastType::accurateOrNull) + return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } return createWrapper(from_type, to_type, requested_result_is_nullable); @@ -3442,7 +3453,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { @@ -3485,7 +3496,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } DataTypePtr from_type_holder; @@ -3576,7 +3587,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -3921,7 +3932,7 @@ private: { return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); res->finalize(); return res; }; @@ -4089,7 +4100,7 @@ private: }; } - auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); + auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(removeNullableOrLowCardinalityNullable(from_type)->getName()); if (!variant_discr_opt) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); @@ -4197,6 +4208,293 @@ private: return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); } + WrapperType createDynamicToColumnWrapper(const DataTypePtr & to_type) const + { + return [this, to_type] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + { + const auto & column_dynamic = assert_cast(*arguments.front().column.get()); + const auto & variant_info = column_dynamic.getVariantInfo(); + auto variant_wrapper = createVariantToColumnWrapper(assert_cast(*variant_info.variant_type), to_type); + ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")}; + return variant_wrapper(args, result_type, col_nullable, input_rows_count); + }; + } + + WrapperType createStringToDynamicThroughParsingWrapper() const + { + return [&](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto column = arguments[0].column->convertToFullColumnIfLowCardinality(); + auto args = arguments; + args[0].column = column; + + const ColumnNullable * column_nullable = nullptr; + if (isColumnNullable(*args[0].column)) + { + column_nullable = assert_cast(args[0].column.get()); + args[0].column = column_nullable->getNestedColumnPtr(); + } + + args[0].type = removeNullable(removeLowCardinality(args[0].type)); + + if (cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + }; + } + + std::pair getReducedVariant( + const ColumnVariant & variant_column, + const DataTypePtr & variant_type, + const std::unordered_map & variant_name_to_discriminator, + size_t max_result_num_variants, + const ColumnDynamic::Statistics & statistics = {}) const + { + LOG_DEBUG(getLogger("FunctionsConversion"), "getReducedVariant for variant {} with size {}", variant_type->getName(), variant_column.size()); + + const auto & variant_types = assert_cast(*variant_type).getVariants(); + /// First check if we don't exceed the limit in current Variant column. + if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String"))) + return {variant_column.getPtr(), variant_type}; + + /// We want to keep the most frequent variants and convert to string the rarest. + std::vector> variant_sizes; + variant_sizes.reserve(variant_types.size()); + std::optional old_string_discriminator; + /// List of variants that should be converted to a single String variant. + std::vector variants_to_convert_to_string; + for (size_t i = 0; i != variant_types.size(); ++i) + { + /// String variant won't be removed. + String variant_name = variant_types[i]->getName(); + LOG_DEBUG(getLogger("FunctionsConversion"), "Variant {}/{} size: {}, statistics: {}", variant_name, i, variant_column.getVariantByGlobalDiscriminator(i).size(), statistics.data.contains(variant_name) ? toString(statistics.data.at(variant_name)) : "none"); + + if (variant_name == "String") + { + old_string_discriminator = i; + /// For simplicity, add this variant to the list that will be converted string, + /// so we will process it with other variants when constructing the new String variant. + variants_to_convert_to_string.push_back(i); + } + else + { + size_t size = 0; + if (statistics.data.empty()) + size = variant_column.getVariantByGlobalDiscriminator(i).size(); + else + size = statistics.data.at(variant_name); + variant_sizes.emplace_back(size, i); + } + } + + /// Sort variants by sizes, so we will keep the most frequent. + std::sort(variant_sizes.begin(), variant_sizes.end(), std::greater()); + + DataTypes remaining_variants; + remaining_variants.reserve(max_result_num_variants); + /// Add String variant in advance. + remaining_variants.push_back(std::make_shared()); + for (auto [_, discr] : variant_sizes) + { + if (remaining_variants.size() != max_result_num_variants) + remaining_variants.push_back(variant_types[discr]); + else + variants_to_convert_to_string.push_back(discr); + } + + auto reduced_variant = std::make_shared(remaining_variants); + const auto & new_variants = reduced_variant->getVariants(); + /// To construct reduced variant column we will need mapping from old to new discriminators. + std::vector old_to_new_discriminators_mapping; + old_to_new_discriminators_mapping.resize(variant_types.size()); + ColumnVariant::Discriminator string_variant_discriminator = 0; + for (size_t i = 0; i != new_variants.size(); ++i) + { + String variant_name = new_variants[i]->getName(); + if (variant_name == "String") + { + string_variant_discriminator = i; + for (auto discr : variants_to_convert_to_string) + old_to_new_discriminators_mapping[discr] = i; + } + else + { + auto old_discr = variant_name_to_discriminator.at(variant_name); + old_to_new_discriminators_mapping[old_discr] = i; + } + } + + /// Convert all reduced variants to String. + std::unordered_map variants_converted_to_string; + variants_converted_to_string.reserve(variants_to_convert_to_string.size()); + size_t string_variant_size = 0; + for (auto discr : variants_to_convert_to_string) + { + auto string_type = std::make_shared(); + auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type); + LOG_DEBUG(getLogger("FunctionsConversion"), "Convert variant {} with size {} to String", variant_types[discr]->getName(), variant_column.getVariantPtrByGlobalDiscriminator(discr)->size()); + auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], ""); + ColumnsWithTypeAndName args = {column_to_convert}; + auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size()); + LOG_DEBUG(getLogger("FunctionsConversion"), "Got String column with size {}", variant_string_column->size()); + string_variant_size += variant_string_column->size(); + variants_converted_to_string[discr] = variant_string_column; + } + + /// Create new discriminators and offsets and fill new String variant according to old discriminators. + auto string_variant = ColumnString::create(); + string_variant->reserve(string_variant_size); + auto new_discriminators_column = variant_column.getLocalDiscriminatorsPtr()->cloneEmpty(); + auto & new_discriminators_data = assert_cast(*new_discriminators_column).getData(); + new_discriminators_data.reserve(variant_column.size()); + auto new_offsets = variant_column.getOffsetsPtr()->cloneEmpty(); + auto & new_offsets_data = assert_cast(*new_offsets).getData(); + new_offsets_data.reserve(variant_column.size()); + const auto & old_local_discriminators = variant_column.getLocalDiscriminators(); + const auto & old_offsets = variant_column.getOffsets(); + LOG_DEBUG(getLogger("FunctionsConversion"), "Discriminators size: {}. Offsets size: {}", old_local_discriminators.size(), old_offsets.size()); + for (size_t i = 0; i != old_local_discriminators.size(); ++i) + { + auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]); + LOG_DEBUG(getLogger("FunctionsConversion"), "Row {}, discriminator {}", i, UInt64(old_discr)); + + if (old_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + new_discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); + new_offsets_data.push_back(0); + continue; + } + + auto new_discr = old_to_new_discriminators_mapping[old_discr]; + new_discriminators_data.push_back(new_discr); + if (new_discr != string_variant_discriminator) + { + LOG_DEBUG(getLogger("FunctionsConversion"), "Keep variant {}", UInt64(old_discr)); + new_offsets_data.push_back(old_offsets[i]); + } + else + { + LOG_DEBUG(getLogger("FunctionsConversion"), "Get string value of variant {} with String column with size {} at offset {}", UInt64(old_discr), variants_converted_to_string[old_discr]->size(), old_offsets[i]); + new_offsets_data.push_back(string_variant->size()); + string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]); + } + } + + /// Create new list of variant columns. + Columns new_variant_columns; + new_variant_columns.resize(new_variants.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + auto new_discr = old_to_new_discriminators_mapping[i]; + if (new_discr != string_variant_discriminator) + new_variant_columns[new_discr] = variant_column.getVariantPtrByGlobalDiscriminator(i); + } + new_variant_columns[string_variant_discriminator] = std::move(string_variant); + return {ColumnVariant::create(std::move(new_discriminators_column), std::move(new_offsets), new_variant_columns), reduced_variant}; + } + + WrapperType createVariantToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const + { + const auto & from_variant_type = assert_cast(*from_type); + size_t max_dynamic_types = dynamic_type.getMaxDynamicTypes(); + const auto & variants = from_variant_type.getVariants(); + std::unordered_map variant_name_to_discriminator; + variant_name_to_discriminator.reserve(variants.size()); + for (size_t i = 0; i != variants.size(); ++i) + variant_name_to_discriminator[variants[i]->getName()] = i; + + return [from_type, max_dynamic_types, variant_name_to_discriminator, this] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & variant_column = assert_cast(*arguments.front().column); + auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(variant_column, from_type, variant_name_to_discriminator, max_dynamic_types); + return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, max_dynamic_types); + }; + } + + WrapperType createColumnToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const + { + if (const auto * variant_type = typeid_cast(from_type.get())) + return createVariantToDynamicWrapper(from_type, dynamic_type); + + if (dynamic_type.getMaxDynamicTypes() == 1) + { + DataTypePtr string_type = std::make_shared(); + if (from_type->isNullable()) + string_type = makeNullable(string_type); + auto string_wrapper = prepareUnpackDictionaries(from_type, string_type); + auto variant_type = std::make_shared(DataTypes{removeNullable(string_type)}); + auto variant_wrapper = createColumnToVariantWrapper(string_type, *variant_type); + return [string_wrapper, variant_wrapper, string_type, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + { + auto string_column = string_wrapper(arguments, string_type, col_nullable, input_rows_count); + auto column = ColumnWithTypeAndName(string_column, string_type, ""); + ColumnsWithTypeAndName args = {column}; + auto variant_column = variant_wrapper(args, variant_type, nullptr, string_column->size()); + return ColumnDynamic::create(variant_column, variant_type, max_dynamic_types); + }; + } + + if (context && context->getSettingsRef().cast_string_to_dynamic_use_inference && isStringOrFixedString(removeNullable(removeLowCardinality(from_type)))) + return createStringToDynamicThroughParsingWrapper(); + + auto variant_type = std::make_shared(DataTypes{removeNullableOrLowCardinalityNullable(from_type)}); + auto variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type); + return [variant_wrapper, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr + { + auto variant_res = variant_wrapper(arguments, variant_type, col_nullable, input_rows_count); + return ColumnDynamic::create(variant_res, variant_type, max_dynamic_types); + }; + } + + WrapperType createDynamicToDynamicWrapper(const DataTypeDynamic & from_dynamic, const DataTypeDynamic & to_dynamic) const + { + size_t from_max_types = from_dynamic.getMaxDynamicTypes(); + size_t to_max_types = to_dynamic.getMaxDynamicTypes(); + if (from_max_types == to_max_types) + return createIdentityWrapper(from_dynamic.getPtr()); + + if (to_max_types > from_max_types) + { + return [to_max_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & column_dynamic = assert_cast(*arguments[0].column); + return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), to_max_types); + }; + } + + return [to_max_types, this] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & column_dynamic = assert_cast(*arguments[0].column); + auto [reduced_variant_column, reduced_variant_type] = getReducedVariant( + column_dynamic.getVariantColumn(), + column_dynamic.getVariantInfo().variant_type, + column_dynamic.getVariantInfo().variant_name_to_discriminator, + to_max_types, + column_dynamic.getStatistics()); + return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, to_max_types); + }; + } + + /// Wrapper for conversion to/from Dynamic type + WrapperType createDynamicWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const + { + if (const auto * from_dynamic = checkAndGetDataType(from_type.get())) + { + if (const auto * to_dynamic = checkAndGetDataType(to_type.get())) + return createDynamicToDynamicWrapper(*from_dynamic, *to_dynamic); + + return createDynamicToColumnWrapper(to_type); + } + + return createColumnToDynamicWrapper(from_type, *checkAndGetDataType(to_type.get())); + } + template WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const { @@ -4376,8 +4674,11 @@ private: WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const { - /// Conversion from/to Variant data type is processed in a special way. + /// Conversion from/to Variant/Dynamic data type is processed in a special way. /// We don't need to remove LowCardinality/Nullable. + if (isDynamic(to_type) || isDynamic(from_type)) + return createDynamicWrapper(from_type, to_type); + if (isVariant(to_type) || isVariant(from_type)) return createVariantWrapper(from_type, to_type); @@ -4691,7 +4992,7 @@ private: if (to_type->getCustomSerialization() && to_type->getCustomName()) { - ret = [requested_result_is_nullable]( + ret = [requested_result_is_nullable, this]( ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, @@ -4700,7 +5001,10 @@ private: auto wrapped_result_type = result_type; if (requested_result_is_nullable) wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( + if (this->cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute( + arguments, wrapped_result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute( arguments, wrapped_result_type, column_nullable, input_rows_count); }; return true; diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp new file mode 100644 index 00000000000..964c058776e --- /dev/null +++ b/src/Functions/dynamicElement.cpp @@ -0,0 +1,172 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** Extract element of Dynamic by type name. + * Also the function looks through Arrays: you can get Array of Dynamic elements from Array of Dynamic. + */ +class FunctionDynamicElement : public IFunction +{ +public: + static constexpr auto name = "dynamicElement"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const size_t number_of_arguments = arguments.size(); + + if (number_of_arguments != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), number_of_arguments); + + size_t count_arrays = 0; + const IDataType * input_type = arguments[0].type.get(); + while (const DataTypeArray * array = checkAndGetDataType(input_type)) + { + input_type = array->getNestedType().get(); + ++count_arrays; + } + + if (!isDynamic(*input_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Variant or Array of Variant. Actual {}", + getName(), + arguments[0].type->getName()); + + auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedElementType(arguments[1].column)); + + for (; count_arrays; --count_arrays) + return_type = std::make_shared(return_type); + + return return_type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & input_arg = arguments[0]; + const IDataType * input_type = input_arg.type.get(); + const IColumn * input_col = input_arg.column.get(); + + bool input_arg_is_const = false; + if (typeid_cast(input_col)) + { + input_col = assert_cast(input_col)->getDataColumnPtr().get(); + input_arg_is_const = true; + } + + Columns array_offsets; + while (const DataTypeArray * array_type = checkAndGetDataType(input_type)) + { + const ColumnArray * array_col = assert_cast(input_col); + + input_type = array_type->getNestedType().get(); + input_col = &array_col->getData(); + array_offsets.push_back(array_col->getOffsetsPtr()); + } + + const ColumnDynamic * input_col_as_dynamic = checkAndGetColumn(input_col); + if (!input_col_as_dynamic) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic or array of Dynamics. Actual {}", getName(), input_arg.type->getName()); + + auto element_type = getRequestedElementType(arguments[1].column); + const auto & variant_info = input_col_as_dynamic->getVariantInfo(); + auto it = variant_info.variant_name_to_discriminator.find(element_type->getName()); + if (it == variant_info.variant_name_to_discriminator.end()) + { + auto result_type = makeNullableOrLowCardinalityNullableSafe(element_type); + auto result_column = result_type->createColumn(); + result_column->insertManyDefaults(input_rows_count); + return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count); + } + + const auto & variant_column = input_col_as_dynamic->getVariantColumn(); + auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), element_type->getName(), it->second, variant_column.localDiscriminatorByGlobal(it->second)); + auto result_column = subcolumn_creator.create(variant_column.getVariantPtrByGlobalDiscriminator(it->second)); + return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count); + } + +private: + DataTypePtr getRequestedElementType(const ColumnPtr & type_name_column) const + { + const auto * name_col = checkAndGetColumnConst(type_name_column.get()); + if (!name_col) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of {} must be a constant String", getName()); + + String element_type_name = name_col->getValue(); + auto element_type = DataTypeFactory::instance().tryGet(element_type_name); + if (!element_type) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument of {} must be a valid type name. Got: {}", getName(), element_type_name); + + return element_type; + } + + ColumnPtr wrapInArraysAndConstIfNeeded(ColumnPtr res, const Columns & array_offsets, bool input_arg_is_const, size_t input_rows_count) const + { + for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it) + res = ColumnArray::create(res, *it); + + if (input_arg_is_const) + res = ColumnConst::create(res, input_rows_count); + + return res; + } +}; + +} + +REGISTER_FUNCTION(DynamicElement) +{ +// factory.registerFunction(FunctionDocumentation{ +// .description = R"( +//Extracts a column with specified type from a `Dynamic` column. +//)", +// .syntax{"dynamicElement(dynamic, type_name)"}, +// .arguments{{ +// {"dynamic", "Dynamic column"}, +// {"type_name", "The name of the variant type to extract"}}}, +// .examples{{{ +// "Example", +// R"( +//)", +// R"( +//)"}}}, +// .categories{"Dynamic"}, +// }); + + factory.registerFunction(); +} + +} diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp new file mode 100644 index 00000000000..8fb2974ceff --- /dev/null +++ b/src/Functions/dynamicType.cpp @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + +/// Return enum with type name for each row in Dynamic column. +class FunctionDynamicType : public IFunction +{ +public: + static constexpr auto name = "dynamicType"; + static constexpr auto name_for_null = "None"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty() || arguments.size() > 1) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1", + getName(), arguments.empty()); + + if (!isDynamic(arguments[0].type.get())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic, got {} instead", + getName(), arguments[0].type->getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnDynamic * dynamic_column = checkAndGetColumn(arguments[0].column.get()); + if (!dynamic_column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be Dynamic, got {} instead", + getName(), arguments[0].type->getName()); + + const auto & variant_info = dynamic_column->getVariantInfo(); + const auto & variant_column = dynamic_column->getVariantColumn(); + auto res = result_type->createColumn(); + String element_type; + for (size_t i = 0; i != input_rows_count; ++i) + { + auto global_discr = variant_column.globalDiscriminatorAt(i); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + element_type = name_for_null; + else + element_type = variant_info.variant_names[global_discr]; + + res->insertData(element_type.data(), element_type.size()); + } + + return res; + } +}; + +} + +REGISTER_FUNCTION(DynamicType) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it. +)", + .syntax = {"dynamicType(variant)"}, + .arguments = {{"variant", "Variant column"}}, + .examples = {{{ + "Example", + R"( +)", + R"( + +)"}}}, + .categories{"Variant"}, + }); +} + +} diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 4f75042ad8d..d501fa28d4b 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1157,6 +1158,11 @@ private: variant_column->applyNullMap(assert_cast(*arg_cond.column).getData()); return result_column; } + else if (auto * dynamic_column = typeid_cast(result_column.get())) + { + dynamic_column->applyNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column); } @@ -1200,6 +1206,11 @@ private: variant_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); return result_column; } + else if (auto * dynamic_column = typeid_cast(result_column.get())) + { + dynamic_column->applyNegatedNullMap(assert_cast(*arg_cond.column).getData()); + return result_column; + } else { size_t size = input_rows_count; diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp index dd5182aeade..f0afc0d5ba3 100644 --- a/src/Functions/isNotNull.cpp +++ b/src/Functions/isNotNull.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -44,9 +45,10 @@ public: { const ColumnWithTypeAndName & elem = arguments[0]; - if (isVariant(elem.type)) + if (isVariant(elem.type) || isDynamic(elem.type)) { - const auto & discriminators = checkAndGetColumn(*elem.column)->getLocalDiscriminators(); + const auto & column_variant = isVariant(elem.type) ? assert_cast(*elem.column) : assert_cast(*elem.column).getVariantColumn(); + const auto & discriminators = column_variant.getLocalDiscriminators(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); data.resize(discriminators.size()); diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp index 4bf4e44f866..7ed4fa7a813 100644 --- a/src/Functions/isNull.cpp +++ b/src/Functions/isNull.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -46,9 +47,10 @@ public: { const ColumnWithTypeAndName & elem = arguments[0]; - if (isVariant(elem.type)) + if (isVariant(elem.type) || isDynamic(elem.type)) { - const auto & discriminators = checkAndGetColumn(*elem.column)->getLocalDiscriminators(); + const auto & column_variant = isVariant(elem.type) ? assert_cast(*elem.column) : assert_cast(*elem.column).getVariantColumn(); + const auto & discriminators = column_variant.getLocalDiscriminators(); auto res = DataTypeUInt8().createColumn(); auto & data = typeid_cast(*res).getData(); data.reserve(discriminators.size()); diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp index 2744a0dabb8..b57ccb6fee1 100644 --- a/src/Functions/variantElement.cpp +++ b/src/Functions/variantElement.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -116,55 +117,12 @@ public: if (!variant_global_discr.has_value()) return arguments[2].column; + auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr); const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr); const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr); - - /// If Variant has only NULLs or our variant doesn't have any real values, - /// just create column with default values and create null mask with 1. - if (input_col_as_variant->hasOnlyNulls() || variant_column->empty()) - { - auto res = variant_type->createColumn(); - - if (variant_type->lowCardinality()) - assert_cast(*res).nestedToNullable(); - - res->insertManyDefaults(input_col_as_variant->size()); - if (!variant_type->canBeInsideNullable()) - return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count); - - auto null_map = ColumnUInt8::create(); - auto & null_map_data = null_map->getData(); - null_map_data.resize_fill(input_col_as_variant->size(), 1); - return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); - } - - /// If we extract single non-empty column and have no NULLs, then just return this variant. - if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// If we were trying to extract some other variant, - /// it would be empty and we would already processed this case above. - chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr); - return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count); - } - - /// In general case we should calculate null-mask for variant - /// according to the discriminators column and expand - /// variant column by this mask to get a full column (with default values on NULLs) - const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators(); - auto null_map = ColumnUInt8::create(); - auto & null_map_data = null_map->getData(); - null_map_data.reserve(local_discriminators.size()); - auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr); - for (auto local_discr : local_discriminators) - null_map_data.push_back(local_discr != variant_local_discr); - - auto expanded_variant_column = IColumn::mutate(variant_column); - if (variant_type->lowCardinality()) - expanded_variant_column = assert_cast(*expanded_variant_column).cloneNullable(); - expanded_variant_column->expand(null_map_data, /*inverted = */ true); - if (variant_type->canBeInsideNullable()) - return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count); - return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count); + auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(input_col_as_variant->getLocalDiscriminatorsPtr(), variant_type->getName(), *variant_global_discr, variant_local_discr); + auto res = subcolumn_creator.create(variant_column); + return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count); } private: std::optional getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7c3bed7388c..739d0f17078 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1496,7 +1496,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, validateVirtualColumns(*res); - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + if (!res->supportsDynamicSubcolumnsDeprecated() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column of type Object, " diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fc58f7b5098..a1cede5ae95 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -554,7 +554,7 @@ BlockIO InterpreterInsertQuery::execute() { /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && !isDynamic(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); } } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 5588fc55a64..351189f70ae 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -2,7 +2,7 @@ #include #include -#include +//#include #include #include @@ -1188,6 +1188,38 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } + if (!unknown_required_source_columns.empty()) + { + + for (const NameAndTypePair & pair : source_columns_ordinary) + { +// std::cerr << "Check ordinary column " << pair.name << "\n"; + if (!pair.type->hasDynamicSubcolumns()) + continue; + +// std::cerr << "Check dyamic subcolumns\n"; + + for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) + { + auto [column_name, dynamic_subcolumn_name] = Nested::splitName(*it); +// std::cerr << "Check dyamic subcolumn " << dynamic_subcolumn_name << "\n"; + + if (column_name == pair.name) + { + if (auto dynamic_subcolumn_type = pair.type->tryGetSubcolumnType(dynamic_subcolumn_name)) + { +// std::cerr << "Found\n"; + source_columns.emplace_back(*it, dynamic_subcolumn_type); + it = unknown_required_source_columns.erase(it); + continue; + } + } + + ++it; + } + } + } + if (!unknown_required_source_columns.empty()) { constexpr auto format_string = "Missing columns: {} while processing query: '{}', required columns:{}{}"; diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 25085ff4823..30b7de409f1 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -26,6 +27,7 @@ #include #include #include +#include namespace DB @@ -165,6 +167,8 @@ Field convertDecimalType(const Field & from, const To & type) Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) { + checkStackSize(); + if (from_type_hint && from_type_hint->equals(type)) { return src; @@ -504,7 +508,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID else if (const DataTypeVariant * type_variant = typeid_cast(&type)) { /// If we have type hint and Variant contains such type, no need to convert field. - if (from_type_hint && type_variant->tryGetVariantDiscriminator(*from_type_hint)) + if (from_type_hint && type_variant->tryGetVariantDiscriminator(from_type_hint->getName())) return src; /// Create temporary column and check if we can insert this field to the variant. @@ -513,6 +517,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (col->tryInsert(src)) return src; } + else if (isDynamic(type)) + { + /// We can insert any field to Dynamic column. + return src; + } /// Conversion from string by parsing. if (src.getType() == Field::Types::String) diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 27c364073ae..3529863a623 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -40,7 +40,7 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio if (!settings.allow_experimental_object_type) { - if (data_type.hasDynamicSubcolumns()) + if (data_type.hasDynamicSubcolumnsDeprecated()) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -107,6 +107,18 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio } } } + + if (!settings.allow_experimental_dynamic_type) + { + if (data_type.hasDynamicSubcolumns()) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Dynamic type is not allowed. " + "Set setting allow_experimental_dynamic_type = 1 in order to allow it", + data_type.getName()); + } + } }; validate_callback(*type_to_check); diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index ffb59bfa457..e2d2bc97ff7 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -21,6 +21,7 @@ struct DataTypeValidationSettings , allow_experimental_variant_type(settings.allow_experimental_variant_type) , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) + , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) { } @@ -30,6 +31,7 @@ struct DataTypeValidationSettings bool allow_experimental_variant_type = true; bool allow_suspicious_variant_types = true; bool validate_nested_types = true; + bool allow_experimental_dynamic_type = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index fcf189e51f4..747a9a6f7ba 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,18 +1,47 @@ #include #include +#include #include #include #include #include - namespace DB { namespace { +class DynamicArgumentsParser : public IParserBase +{ +private: + const char * getName() const override { return "Dynamic data type optional argument"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ASTPtr identifier; + ParserIdentifier identifier_parser; + if (!identifier_parser.parse(pos, identifier, expected)) + return false; + + if (pos->type != TokenType::Equals) + { + expected.add(pos, "equals operator"); + return false; + } + + ++pos; + + ASTPtr number; + ParserNumber number_parser; + if (!number_parser.parse(pos, number, expected)) + return false; + + node = makeASTFunction("equals", identifier, number); + return true; + } +}; + /// Wrapper to allow mixed lists of nested and normal types. /// Parameters are either: /// - Nested table elements; @@ -21,10 +50,21 @@ namespace /// - another data type (or identifier) class ParserDataTypeArgument : public IParserBase { +public: + ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) + { + } + private: const char * getName() const override { return "data type argument"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { + if (type_name == "Dynamic") + { + DynamicArgumentsParser parser; + return parser.parse(pos, node, expected); + } + ParserNestedTable nested_parser; ParserDataType data_type_parser; ParserAllCollectionsOfLiterals literal_parser(false); @@ -39,6 +79,8 @@ private: || literal_parser.parse(pos, node, expected) || data_type_parser.parse(pos, node, expected); } + + std::string_view type_name; }; } @@ -140,7 +182,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; /// Parse optional parameters - ParserList args_parser(std::make_unique(), std::make_unique(TokenType::Comma)); + ParserList args_parser(std::make_unique(type_name), std::make_unique(TokenType::Comma)); ASTPtr expr_list_args; if (!args_parser.parse(pos, expr_list_args, expected)) diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index cae2ab7691e..9996bedb20e 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -105,6 +105,8 @@ public: } } + virtual void finalizeBuffers() {} + protected: friend class ParallelFormattingOutputFormat; @@ -122,7 +124,6 @@ protected: virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} virtual void finalizeImpl() {} - virtual void finalizeBuffers() {} virtual void writePrefix() {} virtual void writeSuffix() {} virtual void resetFormatterImpl() {} diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 3bd0b532d90..857f5040b79 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -70,25 +70,6 @@ static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( return def; } -static MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def) -{ - MutableColumns columns; - columns.resize(header.columns()); - - for (const auto & desc : def.columns_to_simple_aggregate) - { - const auto & type = desc.nested_type ? desc.nested_type - : desc.real_type; - columns[desc.column_number] = type->createColumn(); - } - - for (size_t i = 0; i < columns.size(); ++i) - if (!columns[i]) - columns[i] = header.getByPosition(i).type->createColumn(); - - return columns; -} - /// Remove constants and LowCardinality for SimpleAggregateFunction static void preprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) { @@ -159,12 +140,24 @@ AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescript AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_), def(def_) + : MergedData(false, max_block_size_rows_, max_block_size_bytes_), def(def_) { +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs) +{ + MergedData::initialize(header, inputs); + + for (const auto & desc : def.columns_to_simple_aggregate) + { + const auto & type = desc.nested_type ? desc.nested_type + : desc.real_type; + columns[desc.column_number] = type->createColumn(); + } + initAggregateDescription(); /// Just to make startGroup() simpler. @@ -267,12 +260,14 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( size_t max_block_size_bytes_) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, description_) , columns_definition(defineColumns(header_, description_)) - , merged_data(getMergedColumns(header_, columns_definition), max_block_size_rows_, max_block_size_bytes_, columns_definition) + , merged_data(max_block_size_rows_, max_block_size_bytes_, columns_definition) { } void AggregatingSortedAlgorithm::initialize(Inputs inputs) { + merged_data.initialize(header, inputs); + for (auto & input : inputs) if (input.chunk) preprocessChunk(input.chunk, columns_definition); diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index aa221573151..9ab800058b1 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -101,11 +101,12 @@ private: public: AggregatingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override; + /// Group is a group of rows with the same sorting key. It represents single row in result. /// Algorithm is: start group, add several rows, finish group. /// Then pull chunk when enough groups were added. diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 8948cee217c..f5e4c88fcd0 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -31,8 +31,7 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm( LoggerPtr log_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) , sign_column_number(header_.getPositionByName(sign_column)) , only_positive_sign(only_positive_sign_) , log(log_) @@ -65,7 +64,7 @@ void CollapsingSortedAlgorithm::reportIncorrectData() void CollapsingSortedAlgorithm::insertRow(RowRef & row) { - merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); } std::optional CollapsingSortedAlgorithm::insertRows() @@ -90,8 +89,8 @@ std::optional CollapsingSortedAlgorithm::insertRows() if (count_positive >= count_negative) { - if (merged_data.hasEnoughRows()) - res = merged_data.pull(); + if (merged_data->hasEnoughRows()) + res = merged_data->pull(); insertRow(last_positive_row); @@ -121,8 +120,8 @@ std::optional CollapsingSortedAlgorithm::insertRows() IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() { /// Rare case, which may happen when index_granularity is 1, but we needed to insert 2 rows inside insertRows(). - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` while (queue.isValid()) @@ -148,8 +147,8 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() if (key_differs) { /// if there are enough rows and the last one is calculated completely - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// We write data for the previous primary key. auto res = insertRows(); @@ -220,7 +219,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() return Status(std::move(*res)); } - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } } diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h index be1a3a3bf33..99fd95d82d9 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h @@ -42,8 +42,6 @@ public: Status merge() override; private: - MergedData merged_data; - const size_t sign_column_number; const bool only_positive_sign; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 814625d7aee..2b891592b20 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -46,8 +46,8 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( size_t max_block_size_bytes_, Graphite::Params params_, time_t time_of_merge_) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), false, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs, std::make_unique(false, max_block_size_rows_, max_block_size_bytes_)) + , graphite_rollup_merged_data(assert_cast(*merged_data)) , params(std::move(params_)) , time_of_merge(time_of_merge_) { @@ -63,7 +63,7 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( } } - merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); + graphite_rollup_merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); columns_definition = defineColumns(header_, params); } @@ -113,7 +113,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() const DateLUTImpl & date_lut = timezone ? timezone->getTimeZone() : DateLUT::instance(); - /// Take rows in needed order and put them into `merged_data` until we get `max_block_size` rows. + /// Take rows in needed order and put them into `graphite_rollup_merged_data` until we get `max_block_size` rows. /// /// Variables starting with current_* refer to the rows previously popped from the queue that will /// contribute towards current output row. @@ -142,10 +142,10 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() if (is_new_key) { /// Accumulate the row that has maximum version in the previous group of rows with the same key: - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) accumulateRow(current_subgroup_newest_row); - Graphite::RollupRule next_rule = merged_data.currentRule(); + Graphite::RollupRule next_rule = graphite_rollup_merged_data.currentRule(); if (new_path) next_rule = selectPatternForPath(this->params, next_path); @@ -167,15 +167,15 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() if (will_be_new_key) { - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) { finishCurrentGroup(); /// We have enough rows - return, but don't advance the loop. At the beginning of the /// next call to merge() the same next_cursor will be processed once more and /// the next output row will be created from it. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (graphite_rollup_merged_data.hasEnoughRows()) + return Status(graphite_rollup_merged_data.pull()); } /// At this point previous row has been fully processed, so we can advance the loop @@ -218,28 +218,28 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() } /// Write result row for the last group. - if (merged_data.wasGroupStarted()) + if (graphite_rollup_merged_data.wasGroupStarted()) { accumulateRow(current_subgroup_newest_row); finishCurrentGroup(); } - return Status(merged_data.pull(), true); + return Status(graphite_rollup_merged_data.pull(), true); } void GraphiteRollupSortedAlgorithm::startNextGroup(SortCursor & cursor, Graphite::RollupRule next_rule) { - merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition); + graphite_rollup_merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition); } void GraphiteRollupSortedAlgorithm::finishCurrentGroup() { - merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); + graphite_rollup_merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); } void GraphiteRollupSortedAlgorithm::accumulateRow(RowRef & row) { - merged_data.accumulateRow(row, columns_definition); + graphite_rollup_merged_data.accumulateRow(row, columns_definition); } void GraphiteRollupSortedAlgorithm::GraphiteRollupMergedData::startNextGroup( diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index a20a6eaf11f..aaa3859efb6 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -53,7 +53,7 @@ public: { public: using MergedData::MergedData; - ~GraphiteRollupMergedData(); + ~GraphiteRollupMergedData() override; void startNextGroup(const ColumnRawPtrs & raw_columns, size_t row, Graphite::RollupRule next_rule, ColumnsDefinition & def); @@ -72,7 +72,7 @@ public: }; private: - GraphiteRollupMergedData merged_data; + GraphiteRollupMergedData & graphite_rollup_merged_data; const Graphite::Params params; ColumnsDefinition columns_definition; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h index b8e73aec0dc..cf4b8589441 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h @@ -34,9 +34,9 @@ protected: return !lhs.hasEqualSortColumnsWith(rhs); } -private: Block header; +private: /// Inputs currently being merged. Inputs current_inputs; SortCursorImpls cursors; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp index c8b69382e89..fe5186736b5 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp @@ -5,7 +5,7 @@ namespace DB { IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( - Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs) + Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr merged_data_) : header(std::move(header_)) , description(std::move(description_)) , chunk_allocator(num_inputs + max_row_refs) @@ -13,6 +13,7 @@ IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( , sources(num_inputs) , sources_origin_merge_tree_part_level(num_inputs) , out_row_sources_buf(out_row_sources_buf_) + , merged_data(std::move(merged_data_)) { } @@ -28,6 +29,8 @@ static void prepareChunk(Chunk & chunk) void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs) { + merged_data->initialize(header, inputs); + for (size_t source_num = 0; source_num < inputs.size(); ++source_num) { if (!inputs[source_num].chunk) diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h index 3b4f9e92c5d..bc1aafe93f7 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include namespace DB @@ -10,7 +11,7 @@ class IMergingAlgorithmWithSharedChunks : public IMergingAlgorithm { public: IMergingAlgorithmWithSharedChunks( - Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs); + Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr merged_data_); void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; @@ -25,7 +26,6 @@ private: SortCursorImpls cursors; protected: - struct Source { detail::SharedChunkPtr chunk; @@ -43,6 +43,8 @@ protected: /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf = nullptr; + std::unique_ptr merged_data; + using RowRef = detail::RowRefWithOwnedChunk; void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, sources[cursor.impl->order].chunk); } bool skipLastRowFor(size_t input_number) const { return sources[input_number].skip_last_row; } diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 7ffde835ad0..95f915e4478 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -1,7 +1,9 @@ #pragma once #include +#include #include +#include #include #include @@ -19,17 +21,40 @@ namespace ErrorCodes class MergedData { public: - explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) - : columns(std::move(columns_)), max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) + explicit MergedData(bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) + : max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) { } + virtual void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) + { + columns = header.cloneEmptyColumns(); + std::vector source_columns; + source_columns.resize(columns.size()); + for (const auto & input : inputs) + { + if (!input.chunk) + continue; + + const auto & input_columns = input.chunk.getColumns(); + for (size_t i = 0; i != input_columns.size(); ++i) + source_columns[i].push_back(input_columns[i]); + } + + for (size_t i = 0; i != columns.size(); ++i) + { + if (columns[i]->hasDynamicStructure()) + columns[i]->takeDynamicStructureFromSourceColumns(source_columns[i]); + } + } + /// Pull will be called at next prepare call. void flush() { need_flush = true; } void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) { size_t num_columns = raw_columns.size(); + chassert(columns.size() == num_columns); for (size_t i = 0; i < num_columns; ++i) columns[i]->insertFrom(*raw_columns[i], row); @@ -41,6 +66,7 @@ public: void insertRows(const ColumnRawPtrs & raw_columns, size_t start_index, size_t length, size_t block_size) { size_t num_columns = raw_columns.size(); + chassert(columns.size() == num_columns); for (size_t i = 0; i < num_columns; ++i) { if (length == 1) @@ -61,6 +87,7 @@ public: UInt64 num_rows = chunk.getNumRows(); UInt64 num_columns = chunk.getNumColumns(); + chassert(columns.size() == num_columns); auto chunk_columns = chunk.mutateColumns(); /// Here is a special code for constant columns. @@ -69,9 +96,18 @@ public: for (size_t i = 0; i < num_columns; ++i) { if (isColumnConst(*columns[i])) + { columns[i] = columns[i]->cloneResized(num_rows); + } + else if (columns[i]->hasDynamicStructure()) + { + columns[i] = columns[i]->cloneEmpty(); + columns[i]->insertRangeFrom(*chunk_columns[i], 0, num_rows); + } else + { columns[i] = std::move(chunk_columns[i]); + } } if (rows_size < num_rows) @@ -144,6 +180,8 @@ public: UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } UInt64 maxBlockSize() const { return max_block_size; } + virtual ~MergedData() = default; + protected: MutableColumns columns; diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp index 1debfcec8e0..75a6ddec682 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp @@ -18,7 +18,7 @@ MergingSortedAlgorithm::MergingSortedAlgorithm( WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : header(std::move(header_)) - , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size_, max_block_size_bytes_) + , merged_data(use_average_block_sizes, max_block_size_, max_block_size_bytes_) , description(description_) , limit(limit_) , out_row_sources_buf(out_row_sources_buf_) @@ -59,6 +59,7 @@ static void prepareChunk(Chunk & chunk) void MergingSortedAlgorithm::initialize(Inputs inputs) { + merged_data.initialize(header, inputs); current_inputs = std::move(inputs); for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num) diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 9e5c1249c4e..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -41,9 +41,8 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( bool use_average_block_sizes, bool cleanup_, bool enable_vertical_final_) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) - , enable_vertical_final(enable_vertical_final_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique(use_average_block_sizes, max_block_size_rows, max_block_size_bytes)) + , cleanup(cleanup_), enable_vertical_final(enable_vertical_final_) { if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); @@ -75,7 +74,7 @@ void ReplacingSortedAlgorithm::insertRow() to_be_emitted.push(std::move(selected_row.owned_chunk)); } else - merged_data.insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); + merged_data->insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); selected_row.clear(); } @@ -109,8 +108,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (key_differs) { /// If there are enough rows and the last one is calculated completely - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// Write the data for the previous primary key. if (!selected_row.empty()) @@ -168,8 +167,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() } /// If have enough rows, return block, because it prohibited to overflow requested number of rows. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); /// We will write the data for the last primary key. if (!selected_row.empty()) @@ -193,7 +192,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() return emitChunk(chunk, to_be_emitted.empty()); } - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } void ReplacingSortedAlgorithm::saveChunkForSkippingFinalFromSelectedRow() diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2fbd73c9072..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -44,8 +44,6 @@ public: Status merge() override; private: - MergedData merged_data; - ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; bool cleanup = false; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 28160b18269..49a417e7df2 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -382,39 +382,6 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns( return def; } -static MutableColumns getMergedDataColumns( - const Block & header, - const SummingSortedAlgorithm::ColumnsDefinition & def) -{ - MutableColumns columns; - size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); - columns.reserve(num_columns); - - for (const auto & desc : def.columns_to_aggregate) - { - // Wrap aggregated columns in a tuple to match function signature - if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) - { - size_t tuple_size = desc.column_numbers.size(); - MutableColumns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty(); - - columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); - } - else - { - const auto & type = desc.nested_type ? desc.nested_type : desc.real_type; - columns.emplace_back(type->createColumn()); - } - } - - for (const auto & column_number : def.column_numbers_not_to_aggregate) - columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); - - return columns; -} - static void preprocessChunk(Chunk & chunk, const SummingSortedAlgorithm::ColumnsDefinition & def) { auto num_rows = chunk.getNumRows(); @@ -504,11 +471,44 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, } -SummingSortedAlgorithm::SummingMergedData::SummingMergedData( - MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_) +SummingSortedAlgorithm::SummingMergedData::SummingMergedData(UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) + : MergedData(false, max_block_size_rows_, max_block_size_bytes_) , def(def_) { +} + +void SummingSortedAlgorithm::SummingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs) +{ + MergedData::initialize(header, inputs); + + MutableColumns new_columns; + size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); + new_columns.reserve(num_columns); + + for (const auto & desc : def.columns_to_aggregate) + { + // Wrap aggregated columns in a tuple to match function signature + if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType())) + { + size_t tuple_size = desc.column_numbers.size(); + MutableColumns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = std::move(columns[desc.column_numbers[i]]); + + new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); + } + else + { + const auto & type = desc.nested_type ? desc.nested_type : desc.real_type; + new_columns.emplace_back(type->createColumn()); + } + } + + for (const auto & column_number : def.column_numbers_not_to_aggregate) + new_columns.emplace_back(std::move(columns[column_number])); + + columns = std::move(new_columns); + current_row.resize(def.column_names.size()); initAggregateDescription(); @@ -698,12 +698,14 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( size_t max_block_size_bytes) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_)) , columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns)) - , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size_rows, max_block_size_bytes, columns_definition) + , merged_data(max_block_size_rows, max_block_size_bytes, columns_definition) { } void SummingSortedAlgorithm::initialize(Inputs inputs) { + merged_data.initialize(header, inputs); + for (auto & input : inputs) if (input.chunk) preprocessChunk(input.chunk, columns_definition); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h index dbbe4e53a5f..664b171c4b9 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h @@ -65,7 +65,9 @@ public: using MergedData::insertRow; public: - SummingMergedData(MutableColumns columns_, UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + SummingMergedData(UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); + + void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override; void startGroup(ColumnRawPtrs & raw_columns, size_t row); void finishGroup(); diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp index e7a431dc1d0..9f124c6ba18 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp @@ -16,8 +16,7 @@ VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm( size_t max_block_size_bytes_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) + : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE, std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything. , max_rows_in_queue(std::min(std::max(3, max_block_size_rows_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1) , current_keys(max_rows_in_queue) @@ -47,7 +46,7 @@ void VersionedCollapsingAlgorithm::insertGap(size_t gap_size) void VersionedCollapsingAlgorithm::insertRow(size_t skip_rows, const RowRef & row) { - merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); insertGap(skip_rows); @@ -104,8 +103,8 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() --num_rows_to_insert; /// It's ok to return here, because we didn't affect queue. - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); } if (current_keys.empty()) @@ -147,13 +146,13 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() insertRow(gap, row); current_keys.popFront(); - if (merged_data.hasEnoughRows()) - return Status(merged_data.pull()); + if (merged_data->hasEnoughRows()) + return Status(merged_data->pull()); } /// Write information about last collapsed rows. insertGap(current_keys.frontGap()); - return Status(merged_data.pull(), true); + return Status(merged_data->pull(), true); } } diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h index d98529b301c..e6d20ddac75 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h @@ -29,8 +29,6 @@ public: Status merge() override; private: - MergedData merged_data; - size_t sign_column_number = 0; const size_t max_rows_in_queue; diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index b2e8e9bc89e..6736cd59e83 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -32,15 +32,23 @@ ColumnGathererStream::ColumnGathererStream( void ColumnGathererStream::initialize(Inputs inputs) { + Columns source_columns; + source_columns.reserve(inputs.size()); for (size_t i = 0; i < inputs.size(); ++i) { if (inputs[i].chunk) { sources[i].update(inputs[i].chunk.detachColumns().at(0)); - if (!result_column) - result_column = sources[i].column->cloneEmpty(); + source_columns.push_back(sources[i].column); } } + + if (source_columns.empty()) + return; + + result_column = source_columns[0]->cloneEmpty(); + if (result_column->hasDynamicStructure()) + result_column->takeDynamicStructureFromSourceColumns(source_columns); } IMergingAlgorithm::Status ColumnGathererStream::merge() @@ -52,7 +60,16 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() if (source_to_fully_copy) /// Was set on a previous iteration { Chunk res; - res.addColumn(source_to_fully_copy->column); + if (result_column->hasDynamicStructure()) + { + auto col = result_column->cloneEmpty(); + col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size()); + res.addColumn(std::move(col)); + } + else + { + res.addColumn(source_to_fully_copy->column); + } merged_rows += source_to_fully_copy->size; source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; @@ -96,7 +113,16 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() Chunk res; merged_rows += source_to_fully_copy->column->size(); merged_bytes += source_to_fully_copy->column->allocatedBytes(); - res.addColumn(source_to_fully_copy->column); + if (result_column->hasDynamicStructure()) + { + auto col = result_column->cloneEmpty(); + col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size()); + res.addColumn(std::move(col)); + } + else + { + res.addColumn(source_to_fully_copy->column); + } source_to_fully_copy->pos = source_to_fully_copy->size; source_to_fully_copy = nullptr; return Status(std::move(res)); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index eae5e1a8a47..db6a4d9f06e 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1288,7 +1288,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object). /// But after ALTER TABLE ADD COLUMN we need to fill existing rows with something (exactly the default value). /// So we don't allow to do it for now. - if (command.data_type->hasDynamicSubcolumns()) + if (command.data_type->hasDynamicSubcolumnsDeprecated()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs"); if (virtuals->tryGet(column_name, VirtualsKind::Persistent)) @@ -1366,8 +1366,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const GetColumnsOptions options(GetColumnsOptions::All); const auto old_data_type = all_columns.getColumn(options, column_name).type; - bool new_type_has_object = command.data_type->hasDynamicSubcolumns(); - bool old_type_has_object = old_data_type->hasDynamicSubcolumns(); + bool new_type_has_object = command.data_type->hasDynamicSubcolumnsDeprecated(); + bool old_type_has_object = old_data_type->hasDynamicSubcolumnsDeprecated(); if (new_type_has_object || old_type_has_object) throw Exception( diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 16b89f24243..6f844e31970 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -547,7 +547,18 @@ bool ColumnsDescription::hasNested(const String & column_name) const bool ColumnsDescription::hasSubcolumn(const String & column_name) const { - return subcolumns.get<0>().count(column_name); + if (subcolumns.get<0>().count(column_name)) + return true; + + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + auto it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return true; + } + + return false; } const ColumnDescription & ColumnsDescription::get(const String & column_name) const @@ -644,6 +655,14 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns return *jt; } + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + } + return {}; } @@ -730,9 +749,18 @@ bool ColumnsDescription::hasAlias(const String & column_name) const bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const { auto it = columns.get<1>().find(column_name); - return (it != columns.get<1>().end() - && (defaultKindToGetKind(it->default_desc.kind) & kind)) - || hasSubcolumn(column_name); + if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name)) + return true; + + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->hasSubcolumn(dynamic_subcolumn_name)) + return true; + } + + return false; } bool ColumnsDescription::hasColumnOrNested(GetColumnsOptions::Kind kind, const String & column_name) const diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b14bb7f997b..785ddcd18f8 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -79,6 +79,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 26ebc8601ee..448b4be6c96 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -36,6 +36,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 1108eafc6b6..5a23fcceeb9 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -172,8 +172,10 @@ public: /// This method can return true for readonly engines that return the same rows for reading (such as SystemNumbers) virtual bool supportsTransactions() const { return false; } + /// Returns true if the storage supports storing of data type Object. + virtual bool supportsDynamicSubcolumnsDeprecated() const { return false; } + /// Returns true if the storage supports storing of dynamic subcolumns. - /// For now it makes sense only for data type Object. virtual bool supportsDynamicSubcolumns() const { return false; } /// Requires squashing small blocks to large for optimal storage. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 570175f6614..2e2d1dbed4d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2392,6 +2392,36 @@ void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const exception_code = code; } +ColumnPtr IMergeTreeDataPart::readColumnSample(const NameAndTypePair & column) const +{ + const size_t total_mark = getMarksCount(); + if (!total_mark) + return column.type->createColumn(); + + NamesAndTypesList cols; + cols.emplace_back(column); + + StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); + StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + + MergeTreeReaderPtr reader = getReader( + cols, + storage_snapshot_ptr, + MarkRanges{MarkRange(0, 1)}, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, + storage.getContext()->getMarkCache().get(), + std::make_shared(), + MergeTreeReaderSettings{}, + ValueSizeMap{}, + ReadBufferFromFileBase::ProfileCallback{}); + + Columns result; + result.resize(1); + reader->readRows(0, 1, false, 0, result); + return result[0]; +} + bool isCompactPart(const MergeTreeDataPartPtr & data_part) { return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 7519980a7a3..78619f216c0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -166,6 +166,8 @@ public: NameAndTypePair getColumn(const String & name) const; std::optional tryGetColumn(const String & column_name) const; + ColumnPtr readColumnSample(const NameAndTypePair & column) const; + const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; } SerializationPtr getSerialization(const String & column_name) const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 08a2ff89e7b..c47297be84d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3660,7 +3660,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts continue; auto storage_column = columns.getPhysical(part_column.name); - if (!storage_column.type->hasDynamicSubcolumns()) + if (!storage_column.type->hasDynamicSubcolumnsDeprecated()) continue; auto concrete_storage_column = object_columns.getPhysical(part_column.name); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 046376be474..089793beab8 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -434,6 +434,7 @@ public: bool supportsTTL() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool supportsLightweightDelete() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1605e5cdb9a..d0a685d95fc 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -44,21 +44,27 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } +} + +void MergeTreeDataPartWriterCompact::initStreamsIfNeeded(const Block & block) +{ + if (!compressed_streams.empty()) + return; auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, compression); + addStreams(column, block.getByName(column.name).column, compression); } } -void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc) +void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc) { ISerialization::StreamCallback callback = [&](const auto & substream_path) { assert(!substream_path.empty()); - String stream_name = ISerialization::getFileNameForStream(column, substream_path); + String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); /// Shared offsets for Nested type. if (compressed_streams.contains(stream_name)) @@ -81,7 +87,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, compressed_streams.emplace(stream_name, stream); }; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + data_part->getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column); } namespace @@ -138,6 +144,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state); @@ -148,6 +155,8 @@ void writeColumnSingleGranule( void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation) { + initStreamsIfNeeded(block); + /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical merge) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index ddb6178dce6..1c748803c52 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -42,7 +42,9 @@ private: void addToChecksums(MergeTreeDataPartChecksums & checksums); - void addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc); + void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc); + + void initStreamsIfNeeded(const Block & block); Block header; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 6a3b08d4d65..c23a9a81cbc 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -89,16 +89,25 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( indices_to_recalc_, stats_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { +} + +void MergeTreeDataPartWriterWide::initStreamsIfNeeded(const DB::Block & block) +{ + if (!column_streams.empty()) + return; + + block_sample = block.cloneEmpty(); auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, compression); + addStreams(column, block_sample.getByName(column.name).column, compression); } } void MergeTreeDataPartWriterWide::addStreams( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column, const ASTPtr & effective_codec_desc) { ISerialization::StreamCallback callback = [&](const auto & substream_path) @@ -106,7 +115,7 @@ void MergeTreeDataPartWriterWide::addStreams( assert(!substream_path.empty()); auto storage_settings = storage.getSettings(); - auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); + auto full_stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path); String stream_name; if (storage_settings->replace_long_file_name_to_hash && full_stream_name.size() > storage_settings->max_file_name_length) @@ -138,7 +147,7 @@ void MergeTreeDataPartWriterWide::addStreams( auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); - const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage()); UInt64 max_compress_block_size = 0; if (column_desc) @@ -163,7 +172,7 @@ void MergeTreeDataPartWriterWide::addStreams( }; ISerialization::SubstreamPath path; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + data_part->getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column); } const String & MergeTreeDataPartWriterWide::getStreamName( @@ -222,6 +231,8 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation) { + initStreamsIfNeeded(block); + /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical part of vertical merge) @@ -302,11 +313,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm } void MergeTreeDataPartWriterWide::writeSingleMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns, size_t number_of_rows) { - StreamsWithMarks marks = getCurrentMarksForColumn(column, offset_columns); + auto * sample_column = block_sample.findByName(name_and_type.name); + StreamsWithMarks marks = getCurrentMarksForColumn(name_and_type, sample_column ? sample_column->column : nullptr, offset_columns); for (const auto & mark : marks) flushMarkToFile(mark, number_of_rows); } @@ -323,21 +335,22 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre } StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column_sample, WrittenOffsetColumns & offset_columns) { StreamsWithMarks result; - const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage()); UInt64 min_compress_block_size = 0; if (column_desc) if (const auto * value = column_desc->settings.tryGet("min_compress_block_size")) min_compress_block_size = value->safeGet(); if (!min_compress_block_size) min_compress_block_size = settings.min_compress_block_size; - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + data_part->getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - auto stream_name = getStreamName(column, substream_path); + auto stream_name = getStreamName(name_and_type, substream_path); /// Don't write offsets more than one time for Nested type. if (is_offsets && offset_columns.contains(stream_name)) @@ -355,7 +368,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( stream_with_mark.mark.offset_in_decompressed_block = stream.compressed_hashing.offset(); result.push_back(stream_with_mark); - }); + }, name_and_type.type, column_sample); return result; } @@ -382,7 +395,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( return; column_streams.at(stream_name)->compressed_hashing.nextIfAtEnd(); - }); + }, name_and_type.type, column.getPtr()); } /// Column must not be empty. (column.size() !== 0) @@ -424,7 +437,7 @@ void MergeTreeDataPartWriterWide::writeColumn( "We have to add new mark for column, but already have non written mark. " "Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); - last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns); + last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, column.getPtr(), offset_columns); } writeSingleGranule( @@ -453,7 +466,7 @@ void MergeTreeDataPartWriterWide::writeColumn( bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) offset_columns.insert(getStreamName(name_and_type, substream_path)); - }); + }, name_and_type.type, column.getPtr()); } @@ -622,6 +635,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum if (!serialization_states.empty()) { serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns); + serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX; data_part->getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); } @@ -703,17 +717,17 @@ void MergeTreeDataPartWriterWide::finish(bool sync) } void MergeTreeDataPartWriterWide::writeFinalMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns) { - writeSingleMark(column, offset_columns, 0); + writeSingleMark(name_and_type, offset_columns, 0); /// Memoize information about offsets - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + data_part->getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) - offset_columns.insert(getStreamName(column, substream_path)); - }); + offset_columns.insert(getStreamName(name_and_type, substream_path)); + }, name_and_type.type, block_sample.getByName(name_and_type.name).column); } static void fillIndexGranularityImpl( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index f5ff323563d..ebdd907914f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -63,7 +63,8 @@ private: /// Take offsets from column and return as MarkInCompressed file with stream name StreamsWithMarks getCurrentMarksForColumn( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column_sample, WrittenOffsetColumns & offset_columns); /// Write mark to disk using stream and rows count @@ -73,18 +74,21 @@ private: /// Write mark for column taking offsets from column stream void writeSingleMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns, size_t number_of_rows); void writeFinalMark( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, WrittenOffsetColumns & offset_columns); void addStreams( - const NameAndTypePair & column, + const NameAndTypePair & name_and_type, + const ColumnPtr & column, const ASTPtr & effective_codec_desc); + void initStreamsIfNeeded(const Block & block); + /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical /// errors. @@ -129,6 +133,8 @@ private: /// How many rows we have already written in the current mark. /// More than zero when incoming blocks are smaller then their granularity. size_t rows_written_in_last_mark = 0; + + Block block_sample; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cadd94867ec..ad60e31dddc 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -422,7 +422,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); for (auto & column : columns) - if (column.type->hasDynamicSubcolumns()) + if (column.type->hasDynamicSubcolumnsDeprecated()) column.type = block.getByName(column.name).type; auto minmax_idx = std::make_shared(); diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index dba2bc1e56c..02a3f1b1165 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -116,7 +116,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd ISerialization::DeserializeBinaryBulkStatePtr state; auto serialization = type->getDefaultSerialization(); - serialization->deserializeBinaryBulkStatePrefix(settings, state); + serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr); block.insert(ColumnWithTypeAndName(new_column, type, column.name)); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a22bff6b8d2..7504ce3cc5f 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -195,7 +195,7 @@ void MergeTreeReaderCompact::readPrefix( deserialize_settings.getter = buffer_getter_for_prefix; ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; - serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix, nullptr); } SerializationPtr serialization; @@ -206,7 +206,8 @@ void MergeTreeReaderCompact::readPrefix( deserialize_settings.getter = buffer_getter; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]); + deserialize_settings.dynamic_read_statistics = true; + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name], nullptr); } catch (Exception & e) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 394a22835f1..c8bf12436b0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -43,11 +44,13 @@ MergeTreeReaderWide::MergeTreeReaderWide( mark_ranges_, settings_, avg_value_size_hints_) + , profile_callback(profile_callback_) + , clock_type(clock_type_) { try { for (size_t i = 0; i < columns_to_read.size(); ++i) - addStreams(columns_to_read[i], serializations[i], profile_callback_, clock_type_); + addStreams(columns_to_read[i], serializations[i]); } catch (...) { @@ -100,9 +103,10 @@ void MergeTreeReaderWide::prefetchForAllColumns( try { auto & cache = caches[columns_to_read[pos].getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[columns_to_read[pos].getNameInStorage()]; prefetchForColumn( priority, columns_to_read[pos], serializations[pos], from_mark, continue_reading, - current_task_last_mark, cache); + current_task_last_mark, cache, deserialize_states_cache); } catch (Exception & e) { @@ -147,11 +151,12 @@ size_t MergeTreeReaderWide::readRows( { size_t column_size_before_reading = column->size(); auto & cache = caches[column_to_read.getNameInStorage()]; + auto & deserialize_states_cache = deserialize_states_caches[column_to_read.getNameInStorage()]; readData( column_to_read, serializations[pos], column, from_mark, continue_reading, current_task_last_mark, - max_rows_to_read, cache, /* was_prefetched =*/ !prefetched_streams.empty()); + max_rows_to_read, cache, deserialize_states_cache, /* was_prefetched =*/ !prefetched_streams.empty()); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. @@ -199,9 +204,7 @@ size_t MergeTreeReaderWide::readRows( void MergeTreeReaderWide::addStreams( const NameAndTypePair & name_and_type, - const SerializationPtr & serialization, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, - clockid_t clock_type) + const SerializationPtr & serialization) { bool has_any_stream = false; bool has_all_streams = true; @@ -225,29 +228,8 @@ void MergeTreeReaderWide::addStreams( return; } - auto context = data_part_info_for_read->getContext(); - auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; - - auto marks_loader = std::make_shared( - data_part_info_for_read, - mark_cache, - data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name), - data_part_info_for_read->getMarksCount(), - data_part_info_for_read->getIndexGranularityInfo(), - settings.save_marks_in_cache, - settings.read_settings, - load_marks_threadpool, - /*num_columns_in_mark=*/ 1); - + addStream(substream_path, *stream_name); has_any_stream = true; - auto stream_settings = settings; - stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; - - streams.emplace(*stream_name, std::make_unique( - data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, - data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), - std::move(marks_loader), profile_callback, clock_type)); }; serialization->enumerateStreams(callback); @@ -256,11 +238,36 @@ void MergeTreeReaderWide::addStreams( partially_read_columns.insert(name_and_type.name); } -static ReadBuffer * getStream( +MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name) +{ + auto context = data_part_info_for_read->getContext(); + auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; + + auto marks_loader = std::make_shared( + data_part_info_for_read, + mark_cache, + data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(stream_name), + data_part_info_for_read->getMarksCount(), + data_part_info_for_read->getIndexGranularityInfo(), + settings.save_marks_in_cache, + settings.read_settings, + load_marks_threadpool, + /*num_columns_in_mark=*/ 1); + + auto stream_settings = settings; + stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; + + return streams.emplace(stream_name, std::make_unique( + data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION, + data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings, + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), + std::move(marks_loader), profile_callback, clock_type)).first; +} + +ReadBuffer * MergeTreeReaderWide::getStream( bool seek_to_start, const ISerialization::SubstreamPath & substream_path, const MergeTreeDataPartChecksums & checksums, - MergeTreeReaderWide::FileStreams & streams, const NameAndTypePair & name_and_type, size_t from_mark, bool seek_to_mark, @@ -277,7 +284,13 @@ static ReadBuffer * getStream( auto it = streams.find(*stream_name); if (it == streams.end()) - return nullptr; + { + /// If we didn't create requested stream, but file with this path exists, create a stream for it. + /// It may happen during reading of columns with dynamic subcolumns, because all streams are known + /// only after deserializing of binary bulk prefix. + + it = addStream(substream_path, *stream_name); + } MergeTreeReaderStream & stream = *it->second; stream.adjustRightMark(current_task_last_mark); @@ -294,17 +307,19 @@ void MergeTreeReaderWide::deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { const auto & name = name_and_type.name; if (!deserialize_binary_bulk_state_map.contains(name)) { ISerialization::DeserializeBinaryBulkSettings deserialize_settings; + deserialize_settings.dynamic_read_statistics = true; deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); + return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); }; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name], &deserialize_states_cache); } } @@ -315,9 +330,10 @@ void MergeTreeReaderWide::prefetchForColumn( size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache) + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { @@ -326,7 +342,7 @@ void MergeTreeReaderWide::prefetchForColumn( if (stream_name && !prefetched_streams.contains(*stream_name)) { bool seek_to_mark = !continue_reading; - if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) + if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); prefetched_streams.insert(*stream_name); @@ -337,15 +353,22 @@ void MergeTreeReaderWide::prefetchForColumn( void MergeTreeReaderWide::readData( - const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, - size_t from_mark, bool continue_reading, size_t current_task_last_mark, - size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, bool was_prefetched) + const NameAndTypePair & name_and_type, + const SerializationPtr & serialization, + ColumnPtr & column, + size_t from_mark, + bool continue_reading, + size_t current_task_last_mark, + size_t max_rows_to_read, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, + bool was_prefetched) { double & avg_value_size_hint = avg_value_size_hints[name_and_type.name]; ISerialization::DeserializeBinaryBulkSettings deserialize_settings; deserialize_settings.avg_value_size_hint = avg_value_size_hint; - deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { @@ -353,7 +376,7 @@ void MergeTreeReaderWide::readData( return getStream( /* seek_to_start = */false, substream_path, - data_part_info_for_read->getChecksums(), streams, + data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache); }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index a9a5526dd65..1eef21b455b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -45,14 +45,31 @@ private: void addStreams( const NameAndTypePair & name_and_type, - const SerializationPtr & serialization, - const ReadBufferFromFileBase::ProfileCallback & profile_callback, - clockid_t clock_type); + const SerializationPtr & serialization); + + ReadBuffer * getStream( + bool seek_to_start, + const ISerialization::SubstreamPath & substream_path, + const MergeTreeDataPartChecksums & checksums, + const NameAndTypePair & name_and_type, + size_t from_mark, + bool seek_to_mark, + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache); + + FileStreams::iterator addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name); void readData( - const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column, - size_t from_mark, bool continue_reading, size_t current_task_last_mark, size_t max_rows_to_read, - ISerialization::SubstreamsCache & cache, bool was_prefetched); + const NameAndTypePair & name_and_type, + const SerializationPtr & serialization, + ColumnPtr & column, + size_t from_mark, + bool continue_reading, + size_t current_task_last_mark, + size_t max_rows_to_read, + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache, + bool was_prefetched); /// Make next readData more simple by calling 'prefetch' of all related ReadBuffers (column streams). void prefetchForColumn( @@ -62,17 +79,22 @@ private: size_t from_mark, bool continue_reading, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); void deserializePrefix( const SerializationPtr & serialization, const NameAndTypePair & name_and_type, size_t current_task_last_mark, - ISerialization::SubstreamsCache & cache); + ISerialization::SubstreamsCache & cache, + ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache); std::unordered_map caches; + std::unordered_map deserialize_states_caches; std::unordered_set prefetched_streams; ssize_t prefetched_from_mark = -1; + ReadBufferFromFileBase::ProfileCallback profile_callback; + clockid_t clock_type; }; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 9c67a86997b..3ddd6b21ffb 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,6 +43,7 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + /** M(UInt64, max_types_for_dynamic_serialization, 32, "The maximum number of different types in Dynamic column stored separately in MergeTree tables in wide format. If exceeded, new types will be converted to String", 0) */ \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f67e9484598..b2817b386fa 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -777,7 +777,13 @@ static NameToNameVector collectFilesForRenames( }; if (auto serialization = source_part->tryGetSerialization(command.column_name)) - serialization->enumerateStreams(callback); + { + auto name_and_type = source_part->getColumn(command.column_name); + ColumnPtr column_sample; + if (name_and_type.type->hasDynamicSubcolumns()) + column_sample = source_part->readColumnSample(name_and_type); + serialization->enumerateStreams(callback, name_and_type.type, column_sample); + } /// if we drop a column with statistic, we should also drop the stat file. if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) @@ -813,7 +819,13 @@ static NameToNameVector collectFilesForRenames( }; if (auto serialization = source_part->tryGetSerialization(command.column_name)) - serialization->enumerateStreams(callback); + { + auto name_and_type = source_part->getColumn(command.column_name); + ColumnPtr column_sample; + if (name_and_type.type->hasDynamicSubcolumns()) + column_sample = source_part->readColumnSample(name_and_type); + serialization->enumerateStreams(callback, name_and_type.type, column_sample); + } /// if we rename a column with statistic, we should also rename the stat file. if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index ca8ed9abdb5..a94508ad41f 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -87,6 +87,7 @@ public: bool supportsPrewhere() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 1f735b47819..fce6736aa07 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -81,6 +81,7 @@ private: void drop() override; bool supportsSubsetOfColumns(const ContextPtr & context_) const; bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); std::shared_ptr createSource( diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 27ac7a5c368..be0e88b9b6d 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -98,6 +98,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool supportsTrivialCountOptimization() const override { return true; } diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 545e568a772..9521ae4d24e 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -35,6 +35,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 6c15c7e0238..cd6dd7b933f 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -89,6 +89,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool /*async_insert*/) override; void startup() override; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 12c2ad331ad..5d499fb319b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -712,7 +712,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr auto name_in_storage = Nested::splitName(required_column).first; auto column_in_storage = all_columns.tryGetPhysical(name_in_storage); - if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns()) + if (column_in_storage && column_in_storage->type->hasDynamicSubcolumnsDeprecated()) return true; } diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 3a7e63aef50..85a8de86953 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -85,6 +85,7 @@ public: bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } StoragePolicyPtr getStoragePolicy() const override; diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index e9d8f90f755..a07a5600870 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -20,6 +20,7 @@ public: bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool canMoveConditionsToPrewhere() const override { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 93c263008a6..566c407a798 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -90,6 +90,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index 3acbc71ba7e..b8bb3fd5ea1 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -32,6 +32,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 1ac739f03fd..fcd14fb8ec1 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -628,7 +628,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const auto * available_type = it->getMapped(); - if (!available_type->hasDynamicSubcolumns() + if (!available_type->hasDynamicSubcolumnsDeprecated() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( @@ -676,7 +676,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const auto * provided_column_type = it->getMapped(); const auto * available_column_type = jt->getMapped(); - if (!provided_column_type->hasDynamicSubcolumns() + if (!provided_column_type->hasDynamicSubcolumnsDeprecated() && !provided_column_type->equals(*available_column_type) && !isCompatibleEnumTypes(available_column_type, provided_column_type)) throw Exception( @@ -720,7 +720,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const listOfColumns(available_columns)); const auto * available_type = it->getMapped(); - if (!available_type->hasDynamicSubcolumns() + if (!available_type->hasDynamicSubcolumnsDeprecated() && !column.type->equals(*available_type) && !isCompatibleEnumTypes(available_type, column.type.get())) throw Exception( diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 549cfca1b6c..7f09236454c 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -252,7 +252,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu if (!deserialize_states.contains(name)) { settings.getter = create_stream_getter(true); - serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]); + serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name], nullptr); } settings.getter = create_stream_getter(false); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 198b7a642ee..0d906a933f7 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -32,6 +32,7 @@ public: bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } bool supportsSubcolumns() const override { return getTargetTable()->supportsSubcolumns(); } + bool supportsDynamicSubcolumns() const override { return getTargetTable()->supportsDynamicSubcolumns(); } bool supportsTransactions() const override { return getTargetTable()->supportsTransactions(); } SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 13f1c971d82..ef422a6c872 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -58,6 +58,7 @@ public: bool supportsParallelInsert() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumnsDeprecated() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } /// Smaller blocks (e.g. 64K rows) are better for CPU cache. diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index c049d50f3b4..b08bef0a143 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -49,6 +49,7 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } bool supportsPrewhere() const override { return tableSupportsPrewhere(); } std::optional supportedPrewhereColumns() const override; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index f7ee936db8d..74abf931f8f 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -48,6 +48,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, bool) override { return std::make_shared(metadata_snapshot->getSampleBlock()); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index d1f15edfd6d..3a20872bbe4 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -385,6 +385,8 @@ private: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 6a5b03e682f..3ec84b363fb 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -32,6 +32,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization() const override { return true; } protected: diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 8b087a4a2bc..aada25168f8 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -115,7 +115,7 @@ std::optional StorageSnapshot::tryGetColumn(const GetColumnsOpt { const auto & columns = getMetadataForQuery()->getColumns(); auto column = columns.tryGetColumn(options, column_name); - if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects)) + if (column && (!column->type->hasDynamicSubcolumnsDeprecated() || !options.with_extended_objects)) return column; if (options.with_extended_objects) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 842cfd5b627..3fd7a7f097f 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -295,6 +295,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index dce2e0106ea..ad8113517c5 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -35,6 +35,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization() const override { return true; } private: diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 26e953c0578..6ea7bdc312d 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -210,7 +210,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( auto type_name = type_col[i].get(); auto storage_column = storage_columns.tryGetPhysical(name); - if (storage_column && storage_column->type->hasDynamicSubcolumns()) + if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name))); } } diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh index b816a20c818..6bbd127d933 100755 --- a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1 --max_insert_threads 4 --group_by_two_level_threshold 752249 --group_by_two_level_threshold_bytes 15083870 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 6583861 --max_read_buffer_size 640584 --prefer_localhost_replica 1 --max_block_size 38844 --max_threads 48 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 0 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 4 --optimize_aggregation_in_order 0 --aggregation_in_order_max_block_bytes 18284646 --use_uncompressed_cache 1 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 10737418240 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 0 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 1 --remote_filesystem_read_prefetch 0 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 128Mi --filesystem_prefetches_limit 0 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 50 --filesystem_prefetch_step_bytes 0 --compile_aggregate_expressions 1 --compile_sort_description 0 --merge_tree_coarse_index_granularity 31 --optimize_distinct_in_order 1 --max_bytes_before_external_sort 1 --max_bytes_before_external_group_by 1 --max_bytes_before_remerge_sort 2640239625 --min_compress_block_size 3114155 --max_compress_block_size 226550 --merge_tree_compact_parts_min_granules_to_multibuffer_read 118 --optimize_sorting_by_input_stream_properties 0 --http_response_buffer_size 543038 --http_wait_end_of_query False --enable_memory_bound_merging_of_aggregation_results 1 --min_count_to_compile_expression 3 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 0 --session_timezone America/Mazatlan --prefer_warmed_unmerged_parts_seconds 8 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.82 " function test() diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference new file mode 100644 index 00000000000..d965245266c --- /dev/null +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference @@ -0,0 +1,55 @@ +JSON +{"d":"42","dynamicType(d)":"Int64"} +{"d":42.42,"dynamicType(d)":"Float64"} +{"d":"str","dynamicType(d)":"String"} +{"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"} +{"d":"2020-01-01","dynamicType(d)":"Date"} +{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"} +{"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"} +{"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"} +{"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"} +{"d":["1","str",["1","2","3"]],"dynamicType(d)":"Tuple(Int64, String, Array(Int64))"} +{"d":null,"dynamicType(d)":"None"} +{"d":true,"dynamicType(d)":"Bool"} +{"d":"42","dynamicType(d)":"Int64"} +{"d":"42.42","dynamicType(d)":"String"} +{"d":"str","dynamicType(d)":"String"} +{"d":null,"dynamicType(d)":"None"} +{"d":"1","dynamicType(d)":"Int64"} +CSV +42,"Int64" +42.42,"Float64" +"str","String" +"[1,2,3]","Array(Int64)" +"2020-01-01","Date" +"2020-01-01 10:00:00.000000000","DateTime64(9)" +"[1, 'str', [1, 2, 3]]","String" +\N,"None" +true,"Bool" +TSV +42 Int64 +42.42 Float64 +str String +[1,2,3] Array(Int64) +2020-01-01 Date +2020-01-01 10:00:00.000000000 DateTime64(9) +[1, \'str\', [1, 2, 3]] String +\N None +true Bool +Values +(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool') +Cast using parsing +42 Int64 +42.42 Float64 +[1,2,3] Array(Int64) +2020-01-01 Date +2020-01-01 10:00:00.000000000 DateTime64(9) +\N None +true Bool +42 Int64 +42.42 Float64 +[1, 2, 3] String +2020-01-01 String +2020-01-01 10:00:00 String +\N None +true String diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.sql b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql new file mode 100644 index 00000000000..d12d110fe28 --- /dev/null +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql @@ -0,0 +1,74 @@ +set allow_experimental_dynamic_type = 1; + +select 'JSON'; +select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic', $$ +{"d" : 42} +{"d" : 42.42} +{"d" : "str"} +{"d" : [1, 2, 3]} +{"d" : "2020-01-01"} +{"d" : "2020-01-01 10:00:00"} +{"d" : {"a" : 42, "b" : "str"}} +{"d" : {"a" : 43}} +{"d" : {"a" : 44, "c" : [1, 2, 3]}} +{"d" : [1, "str", [1, 2, 3]]} +{"d" : null} +{"d" : true} +$$) format JSONEachRow; + +select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic(max_types=2)', $$ +{"d" : 42} +{"d" : 42.42} +{"d" : "str"} +{"d" : null} +{"d" : true} +$$) format JSONEachRow; + +select 'CSV'; +select d, dynamicType(d) from format(CSV, 'd Dynamic', +$$42 +42.42 +"str" +"[1, 2, 3]" +"2020-01-01" +"2020-01-01 10:00:00" +"[1, 'str', [1, 2, 3]]" +\N +true +$$) format CSV; + +select 'TSV'; +select d, dynamicType(d) from format(TSV, 'd Dynamic', +$$42 +42.42 +str +[1, 2, 3] +2020-01-01 +2020-01-01 10:00:00 +[1, 'str', [1, 2, 3]] +\N +true +$$) format TSV; + +select 'Values'; +select d, dynamicType(d) from format(Values, 'd Dynamic', $$ +(42) +(42.42) +('str') +([1, 2, 3]) +('2020-01-01') +('2020-01-01 10:00:00') +(NULL) +(true) +$$) format Values; +select ''; + +select 'Cast using parsing'; +drop table if exists test; +create table test (s String) engine=Memory; +insert into test values ('42'), ('42.42'), ('[1, 2, 3]'), ('2020-01-01'), ('2020-01-01 10:00:00'), ('NULL'), ('true'); +set cast_string_to_dynamic_use_inference=1; +select s::Dynamic as d, dynamicType(d) from test; +select s::Dynamic(max_types=3) as d, dynamicType(d) from test; +drop table test; + diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference new file mode 100644 index 00000000000..af91add9ddd --- /dev/null +++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference @@ -0,0 +1,63 @@ +0 UInt64 +1 UInt64 +2 UInt64 +0 String +1 String +2 String +0 +1 +2 +0 +1 +2 +1970-01-01 +1970-01-02 +1970-01-03 +0 UInt64 +1 UInt64 +2 UInt64 +0 UInt64 +\N None +2 UInt64 +0 UInt64 +str_1 String +[0,1] Array(UInt64) +\N None +4 UInt64 +str_5 String +0 String +str_1 String +[0,1] String +\N None +4 String +str_5 String +0 UInt64 +str_1 String +[0,1] String +\N None +4 UInt64 +str_5 String +0 UInt64 +str_1 String +[0,1] Array(UInt64) +\N None +4 UInt64 +str_5 String +0 +1 +2 +0 +1 +2 +0 UInt64 +str_1 String +[0,1] String +\N None +4 UInt64 +str_5 String +0 UInt64 +1970-01-02 Date +[0,1] String +\N None +4 UInt64 +1970-01-06 Date diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql new file mode 100644 index 00000000000..e9b4944f5d8 --- /dev/null +++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql @@ -0,0 +1,24 @@ +set allow_experimental_dynamic_type=1; +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select number::Dynamic as d, dynamicType(d) from numbers(3); +select number::Dynamic(max_types=1) as d, dynamicType(d) from numbers(3); +select number::Dynamic::UInt64 as v from numbers(3); +select number::Dynamic::String as v from numbers(3); +select number::Dynamic::Date as v from numbers(3); +select number::Dynamic::Array(UInt64) as v from numbers(3); -- {serverError TYPE_MISMATCH} +select number::Dynamic::Variant(UInt64, String) as v, variantType(v) from numbers(3); +select (number % 2 ? NULL : number)::Dynamic as d, dynamicType(d) from numbers(3); + +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=1) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); + +select number::Dynamic(max_types=2)::Dynamic(max_types=3) as d from numbers(3); +select number::Dynamic(max_types=2)::Dynamic(max_types=1) as d from numbers(3); +select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6); +select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); + + diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.reference b/tests/queries/0_stateless/03035_dynamic_sorting.reference new file mode 100644 index 00000000000..9b8df11c7a9 --- /dev/null +++ b/tests/queries/0_stateless/03035_dynamic_sorting.reference @@ -0,0 +1,299 @@ +order by d1 nulls first +\N None +\N None +\N None +\N None +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +order by d1 nulls last +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +\N None +\N None +\N None +\N None +order by d2 nulls first +\N None +\N None +\N None +\N None +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +order by d2 nulls last +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,3] Array(Int64) +[1,2,4] Array(Int64) +42 Int64 +42 Int64 +42 Int64 +42 Int64 +42 Int64 +43 Int64 +abc String +abc String +abc String +abc String +abc String +abd String +\N None +\N None +\N None +\N None +order by d1, d2 nulls first +[1,2,3] \N Array(Int64) None +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +[1,2,3] abc Array(Int64) String +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 \N Int64 None +42 [1,2,3] Int64 Array(Int64) +42 42 Int64 Int64 +42 43 Int64 Int64 +42 abc Int64 String +43 42 Int64 Int64 +abc \N String None +abc [1,2,3] String Array(Int64) +abc 42 String Int64 +abc abc String String +abc abd String String +abd abc String String +\N \N None None +\N [1,2,3] None Array(Int64) +\N 42 None Int64 +\N abc None String +order by d1, d2 nulls last +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +[1,2,3] abc Array(Int64) String +[1,2,3] \N Array(Int64) None +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +42 42 Int64 Int64 +42 43 Int64 Int64 +42 abc Int64 String +42 \N Int64 None +43 42 Int64 Int64 +abc [1,2,3] String Array(Int64) +abc 42 String Int64 +abc abc String String +abc abd String String +abc \N String None +abd abc String String +\N [1,2,3] None Array(Int64) +\N 42 None Int64 +\N abc None String +\N \N None None +order by d2, d1 nulls first +\N [1,2,3] None Array(Int64) +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +abc [1,2,3] String Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +\N 42 None Int64 +[1,2,3] 42 Array(Int64) Int64 +42 42 Int64 Int64 +43 42 Int64 Int64 +abc 42 String Int64 +42 43 Int64 Int64 +\N abc None String +[1,2,3] abc Array(Int64) String +42 abc Int64 String +abc abc String String +abd abc String String +abc abd String String +\N \N None None +[1,2,3] \N Array(Int64) None +42 \N Int64 None +abc \N String None +order by d2, d1 nulls last +[1,2,3] [1,2,3] Array(Int64) Array(Int64) +[1,2,4] [1,2,3] Array(Int64) Array(Int64) +42 [1,2,3] Int64 Array(Int64) +abc [1,2,3] String Array(Int64) +\N [1,2,3] None Array(Int64) +[1,2,3] [1,2,4] Array(Int64) Array(Int64) +[1,2,3] 42 Array(Int64) Int64 +42 42 Int64 Int64 +43 42 Int64 Int64 +abc 42 String Int64 +\N 42 None Int64 +42 43 Int64 Int64 +[1,2,3] abc Array(Int64) String +42 abc Int64 String +abc abc String String +abd abc String String +\N abc None String +abc abd String String +[1,2,3] \N Array(Int64) None +42 \N Int64 None +abc \N String None +\N \N None None +d1 = d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) +[1,2,3] 42 0 Array(Int64) Int64 +[1,2,3] abc 0 Array(Int64) String +[1,2,3] \N 0 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 0 Int64 Int64 +42 abc 0 Int64 String +42 \N 0 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 1 String String +abc abd 0 String String +abc \N 0 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 1 None None +d1 < d2 +[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 0 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 0 String String +abc abd 1 String String +abc \N 1 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 0 None None +d1 <= d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 0 Array(Int64) Array(Int64) +42 [1,2,3] 0 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 0 Int64 Int64 +abc [1,2,3] 0 String Array(Int64) +abc 42 0 String Int64 +abc abc 1 String String +abc abd 1 String String +abc \N 1 String None +abd abc 0 String String +\N [1,2,3] 0 None Array(Int64) +\N 42 0 None Int64 +\N abc 0 None String +\N \N 1 None None +d1 > d2 +[1,2,3] [1,2,3] 0 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 0 Array(Int64) Array(Int64) +[1,2,3] 42 0 Array(Int64) Int64 +[1,2,3] abc 0 Array(Int64) String +[1,2,3] \N 0 Array(Int64) None +[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) +42 [1,2,3] 1 Int64 Array(Int64) +42 42 0 Int64 Int64 +42 43 0 Int64 Int64 +42 abc 0 Int64 String +42 \N 0 Int64 None +43 42 1 Int64 Int64 +abc [1,2,3] 1 String Array(Int64) +abc 42 1 String Int64 +abc abc 0 String String +abc abd 0 String String +abc \N 0 String None +abd abc 1 String String +\N [1,2,3] 1 None Array(Int64) +\N 42 1 None Int64 +\N abc 1 None String +\N \N 0 None None +d1 >= d2 +[1,2,3] [1,2,3] 1 Array(Int64) Array(Int64) +[1,2,3] [1,2,4] 1 Array(Int64) Array(Int64) +[1,2,3] 42 1 Array(Int64) Int64 +[1,2,3] abc 1 Array(Int64) String +[1,2,3] \N 1 Array(Int64) None +[1,2,4] [1,2,3] 1 Array(Int64) Array(Int64) +42 [1,2,3] 1 Int64 Array(Int64) +42 42 1 Int64 Int64 +42 43 1 Int64 Int64 +42 abc 1 Int64 String +42 \N 1 Int64 None +43 42 1 Int64 Int64 +abc [1,2,3] 1 String Array(Int64) +abc 42 1 String Int64 +abc abc 1 String String +abc abd 1 String String +abc \N 1 String None +abd abc 1 String String +\N [1,2,3] 1 None Array(Int64) +\N 42 1 None Int64 +\N abc 1 None String +\N \N 1 None None diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.sql b/tests/queries/0_stateless/03035_dynamic_sorting.sql new file mode 100644 index 00000000000..0487fafc955 --- /dev/null +++ b/tests/queries/0_stateless/03035_dynamic_sorting.sql @@ -0,0 +1,80 @@ +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (d1 Dynamic, d2 Dynamic) engine=Memory; + +insert into test values (42, 42); +insert into test values (42, 43); +insert into test values (43, 42); + +insert into test values ('abc', 'abc'); +insert into test values ('abc', 'abd'); +insert into test values ('abd', 'abc'); + +insert into test values ([1,2,3], [1,2,3]); +insert into test values ([1,2,3], [1,2,4]); +insert into test values ([1,2,4], [1,2,3]); + +insert into test values (NULL, NULL); + +insert into test values (42, 'abc'); +insert into test values ('abc', 42); + +insert into test values (42, [1,2,3]); +insert into test values ([1,2,3], 42); + +insert into test values (42, NULL); +insert into test values (NULL, 42); + +insert into test values ('abc', [1,2,3]); +insert into test values ([1,2,3], 'abc'); + +insert into test values ('abc', NULL); +insert into test values (NULL, 'abc'); + +insert into test values ([1,2,3], NULL); +insert into test values (NULL, [1,2,3]); + + +select 'order by d1 nulls first'; +select d1, dynamicType(d1) from test order by d1 nulls first; + +select 'order by d1 nulls last'; +select d1, dynamicType(d1) from test order by d1 nulls last; + +select 'order by d2 nulls first'; +select d2, dynamicType(d2) from test order by d2 nulls first; + +select 'order by d2 nulls last'; +select d2, dynamicType(d2) from test order by d2 nulls last; + + +select 'order by d1, d2 nulls first'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls first; + +select 'order by d1, d2 nulls last'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls last; + +select 'order by d2, d1 nulls first'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls first; + +select 'order by d2, d1 nulls last'; +select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls last; + +select 'd1 = d2'; +select d1, d2, d1 = d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 < d2'; +select d1, d2, d1 < d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 <= d2'; +select d1, d2, d1 <= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 > d2'; +select d1, d2, d1 > d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +select 'd1 >= d2'; +select d1, d2, d2 >= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2; + +drop table test; + diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference new file mode 100644 index 00000000000..36984bc8b9b --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh new file mode 100755 index 00000000000..65517061b99 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null" + $CH_CLIENT -q "select d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null" + $CH_CLIENT -q "select d.Int8, d.Date, d.\`Array(String)\` from test format Null" + $CH_CLIENT -q "select d, d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64, d.\`Array(Variant(String, UInt64))\`.String from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.reference b/tests/queries/0_stateless/03037_dynamic_merges_1.reference new file mode 100644 index 00000000000..fff812f0396 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1.reference @@ -0,0 +1,120 @@ +MergeTree compact + horizontal merge +test1 +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 UInt64 +100000 None +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide + horizontal merge +test1 +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 UInt64 +100000 None +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 UInt64 +100000 None +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree compact + vertical merge +test1 +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 UInt64 +100000 None +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 UInt64 +100000 None +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide + vertical merge +test1 +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.sh b/tests/queries/0_stateless/03037_dynamic_merges_1.sh new file mode 100755 index 00000000000..cf524fb9393 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" + $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" + $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh new file mode 100755 index 00000000000..e9d571c2104 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(1000000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)" + + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference new file mode 100644 index 00000000000..f8118ce8b95 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference @@ -0,0 +1,92 @@ +MergeTree compact + horizontal merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +50000 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree wide + horizontal merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +50000 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree compact + vertical merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None +MergeTree wide + vertical merge +test +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh new file mode 100755 index 00000000000..afb167ec20d --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + + $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + horizontal merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide + vertical merge" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference new file mode 100644 index 00000000000..a7fbbabcd46 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference @@ -0,0 +1,88 @@ +MergeTree compact + horizontal merge +ReplacingMergeTree +100000 UInt64 +100000 String +50000 UInt64 +100000 String +SummingMergeTree +100000 UInt64 +100000 String +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +AggregatingMergeTree +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +MergeTree wide + horizontal merge +ReplacingMergeTree +100000 UInt64 +100000 String +50000 UInt64 +100000 String +SummingMergeTree +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +AggregatingMergeTree +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +MergeTree compact + vertical merge +ReplacingMergeTree +100000 String +100000 UInt64 +50000 UInt64 +100000 String +SummingMergeTree +100000 UInt64 +100000 String +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +AggregatingMergeTree +100000 UInt64 +100000 String +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +MergeTree wide + vertical merge +ReplacingMergeTree +100000 UInt64 +100000 String +50000 UInt64 +100000 String +SummingMergeTree +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 +AggregatingMergeTree +100000 UInt64 +100000 String +200000 1 +50000 String +100000 UInt64 +50000 2 +100000 1 diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh new file mode 100755 index 00000000000..3384a135307 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "ReplacingMergeTree" + $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "drop table test" + + echo "SummingMergeTree" + $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), sum from test group by sum" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), sum from test group by sum" + $CH_CLIENT -q "drop table test" + + echo "AggregatingMergeTree" + $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference new file mode 100644 index 00000000000..03c8b4564fa --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference @@ -0,0 +1,44 @@ +MergeTree compact + horizontal merge +CollapsingMergeTree +100000 String +100000 UInt64 +50000 UInt64 +50000 String +VersionedCollapsingMergeTree +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + horizontal merge +CollapsingMergeTree +100000 UInt64 +100000 String +50000 String +50000 UInt64 +VersionedCollapsingMergeTree +100000 UInt64 +100000 String +75000 String +75000 UInt64 +MergeTree compact + vertical merge +CollapsingMergeTree +100000 UInt64 +100000 String +50000 UInt64 +50000 String +VersionedCollapsingMergeTree +100000 UInt64 +100000 String +75000 UInt64 +75000 String +MergeTree wide + vertical merge +CollapsingMergeTree +100000 UInt64 +100000 String +50000 String +50000 UInt64 +VersionedCollapsingMergeTree +100000 UInt64 +100000 String +75000 UInt64 +75000 String diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh new file mode 100755 index 00000000000..5dae9228d0a --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "CollapsingMergeTree" + $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "drop table test" + + echo "VersionedCollapsingMergeTree" + $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.reference b/tests/queries/0_stateless/03040_dynamic_type_alters.reference new file mode 100644 index 00000000000..ca98ec0963c --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters.reference @@ -0,0 +1,526 @@ +Memory +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +4 UInt64 +7 String +8 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +5 UInt64 +8 String +9 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +5 UInt64 +8 String +9 None +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +5 UInt64 +8 String +12 None +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N \N 3 \N \N +4 4 4 \N \N \N 4 \N \N +5 5 5 \N \N \N 5 \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N \N 12 \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N +MergeTree compact +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N +MergeTree wide +initial insert +alter add column 1 +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 3 \N \N \N +4 4 4 4 \N \N \N +5 5 5 5 \N \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 12 \N \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +15 15 \N \N \N \N \N +16 16 16 16 \N \N \N +17 17 str_17 str_17 \N \N \N +18 18 1970-01-19 1970-01-19 \N \N \N +19 19 \N \N \N \N \N +20 20 20 \N 20 \N \N +21 21 str_21 str_21 \N \N \N +22 22 1970-01-23 \N \N 1970-01-23 \N +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N \N \N \N \N \N +1 1 1 \N \N \N \N \N \N +2 2 2 \N \N \N \N \N \N +3 3 3 \N \N 3 \N \N \N +4 4 4 \N \N 4 \N \N \N +5 5 5 \N \N 5 \N \N \N +6 6 6 \N \N str_6 \N \N \N +7 7 7 \N \N str_7 \N \N \N +8 8 8 \N \N str_8 \N \N \N +9 9 9 \N \N \N \N \N \N +10 10 10 \N \N \N \N \N \N +11 11 11 \N \N \N \N \N \N +12 12 12 \N \N 12 \N \N \N +13 13 13 \N \N str_13 \N \N \N +14 14 14 \N \N \N \N \N \N +15 15 15 \N \N \N \N \N \N +16 16 16 \N \N 16 \N \N \N +17 17 17 \N \N str_17 \N \N \N +18 18 18 \N \N 1970-01-19 \N \N \N +19 19 19 \N \N \N \N \N \N +20 20 20 \N \N \N 20 \N \N +21 21 21 \N \N str_21 \N \N \N +22 22 22 \N \N \N \N 1970-01-23 \N +23 \N \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N \N +25 str_25 \N str_25 \N \N \N \N \N diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh b/tests/queries/0_stateless/03040_dynamic_type_alters.sh new file mode 100755 index 00000000000..a20a92712e0 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --stacktrace --max_insert_threads 3 --group_by_two_level_threshold 1000000 --group_by_two_level_threshold_bytes 42526602 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 8125230 --max_read_buffer_size 859505 --prefer_localhost_replica 1 --max_block_size 34577 --max_threads 41 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 1 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 99 --optimize_aggregation_in_order 1 --aggregation_in_order_max_block_bytes 27635208 --use_uncompressed_cache 0 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 6451111320 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 50 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 0 --remote_filesystem_read_prefetch 1 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 64Mi --filesystem_prefetches_limit 10 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 0 --filesystem_prefetch_step_bytes 100Mi --compile_aggregate_expressions 0 --compile_sort_description 1 --merge_tree_coarse_index_granularity 32 --optimize_distinct_in_order 0 --max_bytes_before_external_sort 10737418240 --max_bytes_before_external_group_by 10737418240 --max_bytes_before_remerge_sort 1374192967 --min_compress_block_size 2152247 --max_compress_block_size 1830907 --merge_tree_compact_parts_min_granules_to_multibuffer_read 79 --optimize_sorting_by_input_stream_properties 1 --http_response_buffer_size 106072 --http_wait_end_of_query True --enable_memory_bound_merging_of_aggregation_results 0 --min_count_to_compile_expression 0 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 3 --session_timezone Africa/Khartoum --prefer_warmed_unmerged_parts_seconds 4 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.03 --ratio_of_defaults_for_sparse_serialization 0.9779014012142565 --prefer_fetch_merged_part_size_threshold 4254002758 --vertical_merge_algorithm_min_rows_to_activate 1 --vertical_merge_algorithm_min_columns_to_activate 1 --allow_vertical_merges_from_compact_to_wide_parts 1 --min_merge_bytes_to_use_direct_io 1 --index_granularity_bytes 4982992 --merge_max_block_size 16662 --index_granularity 22872 --min_bytes_for_wide_part 1073741824 --compress_marks 0 --compress_primary_key 0 --marks_compress_block_size 86328 --primary_key_compress_block_size 64101 --replace_long_file_name_to_hash 0 --max_file_name_length 81 --min_bytes_for_full_part_storage 536870912 --compact_parts_max_bytes_to_buffer 480908080 --compact_parts_max_granules_to_buffer 1 --compact_parts_merge_max_bytes_to_prefetch_part 4535313 --cache_populated_by_fetch 0" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column 1" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 1" + $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 1" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 2" + $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 2" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter modify column 3" + $CH_CLIENT -q "alter table test modify column y Dynamic settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter modify column 3" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" From 18e4c0f1da79fc458707c5557b9e611a1fe916bd Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 26 Apr 2024 13:35:18 +0200 Subject: [PATCH 098/651] Fix remaining integration test --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 4 ++-- src/IO/S3/getObjectInfo.cpp | 2 +- .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp | 1 - .../ObjectStorage/ReadBufferIterator.cpp | 4 ++-- .../ObjectStorage/StorageObjectStorageSource.cpp | 16 +++++++++++----- .../ObjectStorage/StorageObjectStorageSource.h | 7 ++----- 6 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a2522212f90..507e9dbafcb 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -447,7 +447,7 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata result; result.size_bytes = object_info.size; - result.last_modified = object_info.last_modification_time; + result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time); result.attributes = object_info.metadata; return result; @@ -462,7 +462,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons ObjectMetadata result; result.size_bytes = object_info.size; - result.last_modified = object_info.last_modification_time; + result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time); result.attributes = object_info.metadata; return result; diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index 88f79f8d8d5..c294e7905bd 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -53,7 +53,7 @@ namespace const auto & result = outcome.GetResult(); ObjectInfo object_info; object_info.size = static_cast(result.GetContentLength()); - object_info.last_modification_time = result.GetLastModified().Millis() / 1000; + object_info.last_modification_time = result.GetLastModified().Seconds(); if (with_metadata) object_info.metadata = result.GetMetadata(); diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index eeb553e0d62..b37b9de746b 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -116,7 +116,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory= file_size) // { - // LOG_TEST(log, "KSSENII 1 2"); // return false; // } diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index f8ce90a2b1f..9c1d3f79c2b 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -67,11 +67,11 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( auto get_last_mod_time = [&] -> std::optional { if (object_info->metadata) - return object_info->metadata->last_modified.epochMicroseconds(); + return object_info->metadata->last_modified.epochTime(); else { object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); - return object_info->metadata->last_modified.epochMicroseconds(); + return object_info->metadata->last_modified.epochTime(); } }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 3101a7ebf51..4551c2df7c3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -76,6 +76,11 @@ StorageObjectStorageSource::~StorageObjectStorageSource() create_reader_pool->wait(); } +void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +{ + setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); +} + std::shared_ptr StorageObjectStorageSource::createFileIterator( ConfigurationPtr configuration, ObjectStoragePtr object_storage, @@ -213,9 +218,11 @@ std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const O auto get_last_mod_time = [&]() -> std::optional { - return object_info->metadata - ? object_info->metadata->last_modified.epochMicroseconds() - : 0; + if (object_info->metadata) + { + return object_info->metadata->last_modified.epochTime(); + } + return std::nullopt; }; return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); } @@ -260,7 +267,6 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade const auto max_parsing_threads = need_only_count ? std::optional(1) : std::nullopt; read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes); - LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII HEADER: {}", read_from_format_info.format_header.dumpStructure()); auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, getContext(), max_block_size, format_settings, max_parsing_threads, @@ -354,7 +360,7 @@ ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) if (object_info) { - LOG_TEST(&Poco::Logger::get("KeysIterator"), "Next key: {}", object_info->relative_path); + LOG_TEST(logger, "Next key: {}", object_info->relative_path); } return object_info; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 3c2cc3f80cd..0afbf77db2b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -38,10 +38,7 @@ public: String getName() const override { return name; } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override - { - setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); - } + void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; Chunk generate() override; @@ -65,11 +62,11 @@ protected: const bool need_only_count; const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; + ColumnsDescription columns_desc; std::shared_ptr file_iterator; SchemaCache & schema_cache; bool initialized = false; - size_t total_rows_in_file = 0; LoggerPtr log = getLogger("StorageObjectStorageSource"); From a4ed164074fcd96fc198000722563da70f6a31bf Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 26 Apr 2024 13:38:38 +0200 Subject: [PATCH 099/651] Fix clang tidy --- src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 2 +- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 2 +- src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index c6590ba8d43..571e14325bb 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -136,7 +136,7 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile(const String & key, std::set & result) + void processMetadataFile(const String & key, std::set & result) const { auto read_settings = context->getReadSettings(); auto buf = object_storage->readObject(StoredObject(key), read_settings); diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index f5bfb9d2a65..c5565d8b0e8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -61,7 +61,7 @@ StorageObjectStorage::StorageObjectStorage( objects.emplace_back(key); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); - setInMemoryMetadata(std::move(metadata)); + setInMemoryMetadata(metadata); } String StorageObjectStorage::getName() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index f98fc32a3cc..1a1df399626 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -47,7 +47,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster( metadata.setConstraints(constraints_); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); - setInMemoryMetadata(std::move(metadata)); + setInMemoryMetadata(metadata); } std::string StorageObjectStorageCluster::getName() const From 434d2d16f1056977dd80f47d0b687151ac9d16f2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 26 Apr 2024 16:34:12 +0200 Subject: [PATCH 100/651] Cleanuo --- src/Backups/BackupIO_AzureBlobStorage.cpp | 4 +- src/Backups/BackupIO_AzureBlobStorage.h | 10 +- .../registerBackupEngineAzureBlobStorage.cpp | 4 +- src/CMakeLists.txt | 4 +- src/Core/Settings.h | 4 + src/Core/SettingsChangesHistory.h | 4 + .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 78 +++---- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 17 +- .../ObjectStorages/ObjectStorageFactory.cpp | 3 +- .../ObjectStorages/S3/S3ObjectStorage.cpp | 18 -- src/Disks/ObjectStorages/S3/diskSettings.cpp | 10 +- src/Interpreters/InterpreterSystemQuery.cpp | 4 +- .../{AzureBlob => Azure}/Configuration.cpp | 33 +-- .../{AzureBlob => Azure}/Configuration.h | 16 +- .../ObjectStorage/DataLakes/Common.cpp | 4 +- src/Storages/ObjectStorage/DataLakes/Common.h | 4 +- .../DataLakes/DeltaLakeMetadata.cpp | 12 +- .../DataLakes/DeltaLakeMetadata.h | 5 +- .../ObjectStorage/DataLakes/HudiMetadata.h | 4 +- .../DataLakes/IStorageDataLake.h | 2 +- .../DataLakes/IcebergMetadata.cpp | 6 +- .../ObjectStorage/DataLakes/IcebergMetadata.h | 4 +- .../DataLakes/registerDataLakeStorages.cpp | 6 +- .../ObjectStorage/HDFS/Configuration.cpp | 32 +-- .../ObjectStorage/HDFS/Configuration.h | 12 +- .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp | 8 +- .../ObjectStorage/ReadBufferIterator.cpp | 53 ++--- .../ObjectStorage/ReadBufferIterator.h | 8 +- .../ReadFromObjectStorageStep.cpp | 87 ------- .../ObjectStorage/ReadFromObjectStorageStep.h | 55 ----- .../ObjectStorage/S3/Configuration.cpp | 21 +- src/Storages/ObjectStorage/S3/Configuration.h | 11 +- .../ObjectStorage/StorageObjectStorage.cpp | 213 ++++++++++++++++-- .../ObjectStorage/StorageObjectStorage.h | 62 ++++- .../StorageObjectStorageCluster.cpp | 20 +- .../StorageObjectStorageCluster.h | 15 +- .../StorageObjectStorageConfiguration.cpp | 74 ------ .../StorageObjectStorageConfiguration.h | 75 ------ .../StorageObjectStorageSink.cpp | 7 +- .../ObjectStorage/StorageObjectStorageSink.h | 16 +- .../StorageObjectStorageSource.cpp | 23 +- .../StorageObjectStorageSource.h | 7 +- .../StorageObjectStorage_fwd_internal.h | 12 - src/Storages/ObjectStorage/Utils.cpp | 7 +- src/Storages/ObjectStorage/Utils.h | 6 +- .../registerStorageObjectStorage.cpp | 22 +- src/Storages/S3Queue/S3QueueTableMetadata.cpp | 3 +- src/Storages/S3Queue/S3QueueTableMetadata.h | 4 +- src/Storages/S3Queue/StorageS3Queue.cpp | 2 +- .../StorageSystemSchemaInferenceCache.cpp | 4 +- src/TableFunctions/ITableFunctionDataLake.h | 2 +- .../TableFunctionObjectStorage.cpp | 73 ++---- .../TableFunctionObjectStorage.h | 33 ++- .../TableFunctionObjectStorageCluster.cpp | 4 +- .../TableFunctionObjectStorageCluster.h | 4 +- src/TableFunctions/registerTableFunctions.cpp | 12 - .../configs/inf_s3_retries.xml | 1 + .../configs/s3_retries.xml | 1 + 58 files changed, 555 insertions(+), 690 deletions(-) rename src/Storages/ObjectStorage/{AzureBlob => Azure}/Configuration.cpp (93%) rename src/Storages/ObjectStorage/{AzureBlob => Azure}/Configuration.h (78%) delete mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp delete mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorageStep.h delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 673930b5976..f00da686c18 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -28,7 +28,7 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - const StorageAzureBlobConfiguration & configuration_, + const StorageAzureConfiguration & configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) @@ -112,7 +112,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - const StorageAzureBlobConfiguration & configuration_, + const StorageAzureConfiguration & configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 25c52f9b0d3..4643c103fd5 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB @@ -17,7 +17,7 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: BackupReaderAzureBlobStorage( - const StorageAzureBlobConfiguration & configuration_, + const StorageAzureConfiguration & configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); @@ -39,7 +39,7 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlobConfiguration configuration; + StorageAzureConfiguration configuration; std::unique_ptr object_storage; std::shared_ptr settings; }; @@ -48,7 +48,7 @@ class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: BackupWriterAzureBlobStorage( - const StorageAzureBlobConfiguration & configuration_, + const StorageAzureConfiguration & configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, @@ -85,7 +85,7 @@ private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureBlobConfiguration configuration; + StorageAzureConfiguration configuration; std::unique_ptr object_storage; std::shared_ptr settings; }; diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 049a4b1a338..1e3b3759257 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #endif @@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureBlobConfiguration configuration; + StorageAzureConfiguration configuration; if (!id_arg.empty()) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c26c40d4b87..d5d17f992dc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -103,7 +103,6 @@ add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhous add_headers_and_sources(dbms Disks/IO) add_headers_and_sources(dbms Disks/ObjectStorages) -add_headers_and_sources(dbms Disks/ObjectStorages) if (TARGET ch_contrib::sqlite) add_headers_and_sources(dbms Databases/SQLite) endif() @@ -117,7 +116,7 @@ if (TARGET ch_contrib::nats_io) endif() add_headers_and_sources(dbms Storages/ObjectStorage) -add_headers_and_sources(dbms Storages/ObjectStorage/AzureBlob) +add_headers_and_sources(dbms Storages/ObjectStorage/Azure) add_headers_and_sources(dbms Storages/ObjectStorage/S3) add_headers_and_sources(dbms Storages/ObjectStorage/HDFS) add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes) @@ -148,7 +147,6 @@ if (TARGET ch_contrib::azure_sdk) endif() if (TARGET ch_contrib::hdfs) - add_headers_and_sources(dbms Storages/ObjectStorage/HDFS) add_headers_and_sources(dbms Disks/ObjectStorages/HDFS) endif() diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ff7a9089327..bf558d7b1ba 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -113,9 +113,12 @@ class IColumn; M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageS3", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageAzure", 0) \ + M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ @@ -128,6 +131,7 @@ class IColumn; M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \ M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \ + M(Bool, azure_skip_empty_files, false, "Allow to skip empty files in azure table engine", 0) \ M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index cfe3c290d83..4954fa5d996 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,6 +90,10 @@ static std::map sett {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + {"hdfs_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"}, + {"azure_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"}, + {"s3_validate_request_settings", true, true, "Validate S3 request settings"}, + {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index ed63795cb05..6c2f310a7d1 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -23,15 +23,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -void HDFSObjectStorage::shutdown() -{ -} - -void HDFSObjectStorage::startup() -{ -} - -void HDFSObjectStorage::initializeHDFS() const +void HDFSObjectStorage::initializeHDFSFS() const { if (initialized) return; @@ -45,9 +37,25 @@ void HDFSObjectStorage::initializeHDFS() const initialized = true; } +std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & object) const +{ + /// This is very unfortunate, but for disk HDFS we made a mistake + /// and now its behaviour is inconsistent with S3 and Azure disks. + /// The mistake is that for HDFS we write into metadata files whole URL + data directory + key, + /// while for S3 and Azure we write there only data_directory + key. + /// This leads us into ambiguity that for StorageHDFS we have just key in object.remote_path, + /// but for DiskHDFS we have there URL as well. + auto path = object.remote_path; + if (path.starts_with(url)) + path = path.substr(url.size()); + if (path.starts_with("/")) + path.substr(1); + return path; +} + ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { - initializeHDFS(); + initializeHDFSFS(); /// what ever data_source_description.description value is, consider that key as relative key chassert(data_directory.starts_with("/")); return ObjectStorageKey::createAsRelative( @@ -56,14 +64,11 @@ ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & bool HDFSObjectStorage::exists(const StoredObject & object) const { - initializeHDFS(); + initializeHDFSFS(); std::string path = object.remote_path; if (path.starts_with(url_without_path)) path = path.substr(url_without_path.size()); - // const auto & path = object.remote_path; - // const size_t begin_of_path = path.find('/', path.find("//") + 2); - // const String remote_fs_object_path = path.substr(begin_of_path); return (0 == hdfsExists(hdfs_fs.get(), path.c_str())); } @@ -73,13 +78,8 @@ std::unique_ptr HDFSObjectStorage::readObject( /// NOLIN std::optional, std::optional) const { - initializeHDFS(); - std::string path = object.remote_path; - if (path.starts_with(url)) - path = path.substr(url.size()); - if (path.starts_with("/")) - path.substr(1); - + initializeHDFSFS(); + auto path = extractObjectKeyFromURL(object); return std::make_unique( fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings)); } @@ -90,21 +90,13 @@ std::unique_ptr HDFSObjectStorage::readObjects( /// NOLI std::optional, std::optional) const { - initializeHDFS(); + initializeHDFSFS(); auto disk_read_settings = patchSettings(read_settings); auto read_buffer_creator = [this, disk_read_settings] (bool /* restricted_seek */, const StoredObject & object_) -> std::unique_ptr { - // size_t begin_of_path = path.find('/', path.find("//") + 2); - // auto hdfs_path = path.substr(begin_of_path); - // auto hdfs_uri = path.substr(0, begin_of_path); - - std::string path = object_.remote_path; - if (path.starts_with(url)) - path = path.substr(url.size()); - if (path.starts_with("/")) - path.substr(1); + auto path = extractObjectKeyFromURL(object_); return std::make_unique( fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true); }; @@ -120,7 +112,7 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL size_t buf_size, const WriteSettings & write_settings) { - initializeHDFS(); + initializeHDFSFS(); if (attributes.has_value()) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, @@ -142,7 +134,7 @@ std::unique_ptr HDFSObjectStorage::writeObject( /// NOL /// Remove file. Throws exception if file doesn't exists or it's a directory. void HDFSObjectStorage::removeObject(const StoredObject & object) { - initializeHDFS(); + initializeHDFSFS(); auto path = object.remote_path; if (path.starts_with(url_without_path)) path = path.substr(url_without_path.size()); @@ -156,28 +148,28 @@ void HDFSObjectStorage::removeObject(const StoredObject & object) void HDFSObjectStorage::removeObjects(const StoredObjects & objects) { - initializeHDFS(); + initializeHDFSFS(); for (const auto & object : objects) removeObject(object); } void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object) { - initializeHDFS(); + initializeHDFSFS(); if (exists(object)) removeObject(object); } void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { - initializeHDFS(); + initializeHDFSFS(); for (const auto & object : objects) removeObjectIfExists(object); } ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const { - initializeHDFS(); + initializeHDFSFS(); auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data()); if (!file_info) throw Exception(ErrorCodes::HDFS_ERROR, @@ -185,7 +177,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co ObjectMetadata metadata; metadata.size_bytes = static_cast(file_info->mSize); - metadata.last_modified = file_info->mLastMod; + metadata.last_modified = Poco::Timestamp::fromEpochTime(file_info->mLastMod); hdfsFreeFileInfo(file_info, 1); return metadata; @@ -193,9 +185,9 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { - initializeHDFS(); + initializeHDFSFS(); auto * log = &Poco::Logger::get("HDFSObjectStorage"); - LOG_TRACE(log, "Trying to list files for {}", path); + LOG_TEST(log, "Trying to list files for {}", path); HDFSFileInfo ls; ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length); @@ -213,7 +205,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); } - LOG_TRACE(log, "Listed {} files for {}", ls.length, path); + LOG_TEST(log, "Listed {} files for {}", ls.length, path); for (int i = 0; i < ls.length; ++i) { @@ -228,8 +220,6 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM } else { - LOG_TEST(log, "Found file: {}", file_path); - children.emplace_back(std::make_shared( String(file_path), ObjectMetadata{ @@ -247,7 +237,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT const WriteSettings & write_settings, std::optional object_to_attributes) { - initializeHDFS(); + initializeHDFSFS(); if (object_to_attributes.has_value()) throw Exception( ErrorCodes::UNSUPPORTED_METHOD, diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index b626d3dc779..e747b283400 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -35,7 +35,8 @@ public: HDFSObjectStorage( const String & hdfs_root_path_, SettingsPtr settings_, - const Poco::Util::AbstractConfiguration & config_) + const Poco::Util::AbstractConfiguration & config_, + bool lazy_initialize) : config(config_) , settings(std::move(settings_)) { @@ -46,6 +47,9 @@ public: data_directory = url.substr(begin_of_path); else data_directory = "/"; + + if (!lazy_initialize) + initializeHDFSFS(); } std::string getName() const override { return "HDFSObjectStorage"; } @@ -98,10 +102,6 @@ public: void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override; - void shutdown() override; - - void startup() override; - String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage( @@ -114,8 +114,13 @@ public: bool isRemote() const override { return true; } + void startup() override { } + + void shutdown() override { } + private: - void initializeHDFS() const; + void initializeHDFSFS() const; + std::string extractObjectKeyFromURL(const StoredObject & object) const; const Poco::Util::AbstractConfiguration & config; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 67e38d6389a..1a2ea0c2593 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -232,7 +232,8 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) context->getSettingsRef().hdfs_replication ); - return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config); + return createObjectStorage( + ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config, /* lazy_initialize */false); }); } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 507e9dbafcb..0801a84ce13 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -582,27 +582,9 @@ void S3ObjectStorage::applyNewSettings( auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri); client.set(std::move(new_client)); } - s3_settings.set(std::move(new_s3_settings)); } -// void S3ObjectStorage::applyNewSettings(ContextPtr context) -// { -// auto settings = s3_settings.get(); -// if (!endpoint_settings || !settings->auth_settings.hasUpdates(endpoint_settings->auth_settings)) -// return; -// -// const auto & config = context->getConfigRef(); -// auto new_s3_settings = getSettings(uri, config, "s3.", context); -// -// new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); -// -// auto new_client = getClient(config, "s3.", context, *new_s3_settings, false); -// -// s3_settings.set(std::move(new_s3_settings)); -// client.set(std::move(new_client)); -// } - std::unique_ptr S3ObjectStorage::cloneObjectStorage( const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 49300a9cd89..a38c0d3c85f 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -100,11 +100,9 @@ std::unique_ptr getClient( settings.request_settings.put_request_throttler, url.uri.getScheme()); - client_configuration.endpointOverride = url.endpoint; - client_configuration.maxConnections = static_cast(request_settings.max_connections); client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value); client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value); - client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); + client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast(request_settings.max_connections)); client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); @@ -112,12 +110,6 @@ std::unique_ptr getClient( client_configuration.s3_use_adaptive_timeouts = config.getBool( config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); - // client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000); - // client_configuration.http_connection_pool_size = config.getUInt( - // config_prefix + ".http_connection_pool_size", static_cast(global_settings.s3_http_connection_pool_size.value)); - // client_configuration.s3_use_adaptive_timeouts = config.getBool(config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); - // client_configuration.wait_on_pool_size_limit = for_disk_s3; - if (for_disk_s3) { /* diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index af9dc08e8c7..56b2904363e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include #include @@ -502,7 +502,7 @@ BlockIO InterpreterSystemQuery::execute() StorageURL::getSchemaCache(getContext()).clear(); #if USE_AZURE_BLOB_STORAGE if (caches_to_drop.contains("AZURE")) - StorageObjectStorage::getSchemaCache(getContext(), StorageAzureBlobConfiguration::type_name).clear(); + StorageObjectStorage::getSchemaCache(getContext(), StorageAzureConfiguration::type_name).clear(); #endif break; } diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp similarity index 93% rename from src/Storages/ObjectStorage/AzureBlob/Configuration.cpp rename to src/Storages/ObjectStorage/Azure/Configuration.cpp index f268b812c03..43992a81eef 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -1,8 +1,9 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -65,7 +66,7 @@ namespace } } -void StorageAzureBlobConfiguration::check(ContextPtr context) const +void StorageAzureConfiguration::check(ContextPtr context) const { Poco::URI url_to_check; if (is_connection_string) @@ -77,11 +78,11 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const url_to_check = Poco::URI(connection_url); context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check); - StorageObjectStorageConfiguration::check(context); + Configuration::check(context); } -StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other) - : StorageObjectStorageConfiguration(other) +StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other) + : Configuration(other) { connection_url = other.connection_url; is_connection_string = other.is_connection_string; @@ -92,7 +93,7 @@ StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureB blobs_paths = other.blobs_paths; } -AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context) +AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context) { const auto & context_settings = context->getSettingsRef(); auto settings_ptr = std::make_unique(); @@ -102,7 +103,7 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co return settings_ptr; } -StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySettings(const ContextPtr & context) const +StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const { const auto & settings = context->getSettingsRef(); return StorageObjectStorage::QuerySettings{ @@ -110,14 +111,14 @@ StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySetti .create_new_file_on_insert = settings.azure_create_new_file_on_insert, .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure, .schema_inference_mode = settings.schema_inference_mode, - .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure + .skip_empty_files = settings.azure_skip_empty_files, .list_object_keys_size = settings.azure_list_object_keys_size, - .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .throw_on_zero_files_match = settings.azure_throw_on_zero_files_match, .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist, }; } -ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT +ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); auto client = createClient(is_readonly, /* attempt_to_create_container */true); @@ -125,7 +126,7 @@ ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr c return std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), container); } -AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) +AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) { using namespace Azure::Storage::Blobs; @@ -133,8 +134,8 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bo if (is_connection_string) { - std::shared_ptr managed_identity_credential = std::make_shared(); - std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); + auto managed_identity_credential = std::make_shared(); + auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); if (attempt_to_create_container) @@ -243,7 +244,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bo return result; } -void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection & collection) +void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); @@ -275,7 +276,7 @@ void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection & blobs_paths = {blob_path}; } -void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) +void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) { if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7)) { @@ -396,7 +397,7 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte blobs_paths = {blob_path}; } -void StorageAzureBlobConfiguration::addStructureAndFormatToArgs( +void StorageAzureConfiguration::addStructureAndFormatToArgs( ASTs & args, const String & structure_, const String & format_, ContextPtr context) { if (tryGetNamedCollectionWithOverrides(args, context)) diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h similarity index 78% rename from src/Storages/ObjectStorage/AzureBlob/Configuration.h rename to src/Storages/ObjectStorage/Azure/Configuration.h index 7e105ea82b5..91a9a0bbbd5 100644 --- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -5,24 +5,27 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include +#include namespace DB { class BackupFactory; -class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration +class StorageAzureConfiguration : public StorageObjectStorage::Configuration { friend class BackupReaderAzureBlobStorage; friend class BackupWriterAzureBlobStorage; friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory); public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type_name = "azure"; static constexpr auto engine_name = "Azure"; - StorageAzureBlobConfiguration() = default; - StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other); + StorageAzureConfiguration() = default; + StorageAzureConfiguration(const StorageAzureConfiguration & other); std::string getTypeName() const override { return type_name; } std::string getEngineName() const override { return engine_name; } @@ -31,16 +34,15 @@ public: void setPath(const Path & path) override { blob_path = path; } const Paths & getPaths() const override { return blobs_paths; } - Paths & getPaths() override { return blobs_paths; } void setPaths(const Paths & paths) override { blobs_paths = paths; } - String getDataSourceDescription() override { return fs::path(connection_url) / container; } + String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; } String getNamespace() const override { return container; } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT - StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } + ConfigurationPtr clone() override { return std::make_shared(*this); } void fromNamedCollection(const NamedCollection & collection) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp index 0c9237127b9..4830cc52a90 100644 --- a/src/Storages/ObjectStorage/DataLakes/Common.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp @@ -1,6 +1,6 @@ #include "Common.h" #include -#include +#include #include namespace DB @@ -8,7 +8,7 @@ namespace DB std::vector listFiles( const IObjectStorage & object_storage, - const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorage::Configuration & configuration, const String & prefix, const String & suffix) { auto key = std::filesystem::path(configuration.getPath()) / prefix; diff --git a/src/Storages/ObjectStorage/DataLakes/Common.h b/src/Storages/ObjectStorage/DataLakes/Common.h index ae3767f2eec..db3afa9e4a6 100644 --- a/src/Storages/ObjectStorage/DataLakes/Common.h +++ b/src/Storages/ObjectStorage/DataLakes/Common.h @@ -1,15 +1,15 @@ #pragma once #include +#include namespace DB { class IObjectStorage; -class StorageObjectStorageConfiguration; std::vector listFiles( const IObjectStorage & object_storage, - const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorage::Configuration & configuration, const String & prefix, const String & suffix); } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 571e14325bb..277d07d88ef 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -85,7 +85,7 @@ struct DeltaLakeMetadata::Impl while (true) { const auto filename = withPadding(++current_version) + metadata_file_suffix; - const auto file_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / filename; + const auto file_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / filename; if (!object_storage->exists(StoredObject(file_path))) break; @@ -161,12 +161,12 @@ struct DeltaLakeMetadata::Impl if (json.has("add")) { const auto path = json["add"]["path"].getString(); - result.insert(fs::path(configuration->getPath()) / path); + result.insert(std::filesystem::path(configuration->getPath()) / path); } else if (json.has("remove")) { const auto path = json["remove"]["path"].getString(); - result.erase(fs::path(configuration->getPath()) / path); + result.erase(std::filesystem::path(configuration->getPath()) / path); } } } @@ -186,7 +186,7 @@ struct DeltaLakeMetadata::Impl */ size_t readLastCheckpointIfExists() const { - const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; + const auto last_checkpoint_file = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint"; if (!object_storage->exists(StoredObject(last_checkpoint_file))) return 0; @@ -249,7 +249,7 @@ struct DeltaLakeMetadata::Impl return 0; const auto checkpoint_filename = withPadding(version) + ".checkpoint.parquet"; - const auto checkpoint_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename; + const auto checkpoint_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename; LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string()); @@ -311,7 +311,7 @@ struct DeltaLakeMetadata::Impl if (filename.empty()) continue; LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(fs::path(configuration->getPath()) / filename); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); if (!inserted) throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index 5050b88d809..e527721b29e 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -12,8 +12,7 @@ namespace DB class DeltaLakeMetadata final : public IDataLakeMetadata { public: - using ConfigurationPtr = StorageObjectStorageConfigurationPtr; - + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; static constexpr auto name = "DeltaLake"; DeltaLakeMetadata( diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index 6054c3f15d6..3ab274b1fbf 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -13,7 +13,7 @@ namespace DB class HudiMetadata final : public IDataLakeMetadata, private WithContext { public: - using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; static constexpr auto name = "Hudi"; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 144cc16939c..3119b844aaf 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -88,7 +88,7 @@ public: else { ConfigurationPtr configuration = base_configuration->clone(); - configuration->getPaths() = metadata->getDataFiles(); + configuration->setPaths(metadata->getDataFiles()); return Storage::resolveSchemaFromData( object_storage_, configuration, format_settings_, local_context); } diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp index 8ee6f002ca6..591e5ef03f6 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp @@ -45,7 +45,7 @@ namespace ErrorCodes IcebergMetadata::IcebergMetadata( ObjectStoragePtr object_storage_, - StorageObjectStorageConfigurationPtr configuration_, + ConfigurationPtr configuration_, DB::ContextPtr context_, Int32 metadata_version_, Int32 format_version_, @@ -341,7 +341,7 @@ MutableColumns parseAvro( */ std::pair getMetadataFileAndVersion( ObjectStoragePtr object_storage, - const StorageObjectStorageConfiguration & configuration) + const StorageObjectStorage::Configuration & configuration) { const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json"); if (metadata_files.empty()) @@ -378,7 +378,7 @@ std::pair getMetadataFileAndVersion( DataLakeMetadataPtr IcebergMetadata::create( ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, + ConfigurationPtr configuration, ContextPtr local_context) { const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration); diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index f88e3eecc67..06dbd373bf9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include namespace DB @@ -61,7 +61,7 @@ namespace DB class IcebergMetadata : public IDataLakeMetadata, private WithContext { public: - using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; static constexpr auto name = "Iceberg"; diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp index a5170e5ed6b..0fa6402e892 100644 --- a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp +++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp @@ -20,7 +20,7 @@ void registerStorageIceberg(StorageFactory & factory) [&](const StorageFactory::Arguments & args) { auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); return StorageIceberg::create( configuration, args.getContext(), args.table_id, args.columns, @@ -43,7 +43,7 @@ void registerStorageDeltaLake(StorageFactory & factory) [&](const StorageFactory::Arguments & args) { auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); return StorageDeltaLake::create( configuration, args.getContext(), args.table_id, args.columns, @@ -64,7 +64,7 @@ void registerStorageHudi(StorageFactory & factory) [&](const StorageFactory::Arguments & args) { auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false); return StorageHudi::create( configuration, args.getContext(), args.table_id, args.columns, diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 12e3f3adb12..a8a9ab5b557 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -1,18 +1,21 @@ #include #if USE_HDFS -#include -#include -#include +#include #include -#include -#include #include +#include + +#include +#include + +#include +#include +#include + #include #include #include -#include - namespace DB { @@ -23,7 +26,7 @@ namespace ErrorCodes } StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other) - : StorageObjectStorageConfiguration(other) + : Configuration(other) { url = other.url; path = other.path; @@ -34,7 +37,7 @@ void StorageHDFSConfiguration::check(ContextPtr context) const { context->getRemoteHostFilter().checkURL(Poco::URI(url)); checkHDFSURL(fs::path(url) / path.substr(1)); - StorageObjectStorageConfiguration::check(context); + Configuration::check(context); } ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT @@ -47,10 +50,11 @@ ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT settings.remote_read_min_bytes_for_seek, settings.hdfs_replication ); - return std::make_shared(url, std::move(hdfs_settings), context->getConfigRef()); + return std::make_shared( + url, std::move(hdfs_settings), context->getConfigRef(), /* lazy_initialize */true); } -std::string StorageHDFSConfiguration::getPathWithoutGlob() const +std::string StorageHDFSConfiguration::getPathWithoutGlobs() const { /// Unlike s3 and azure, which are object storages, /// hdfs is a filesystem, so it cannot list files by partual prefix, @@ -69,9 +73,9 @@ StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(c .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert, .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs, .schema_inference_mode = settings.schema_inference_mode, - .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs - .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs - .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match, + .skip_empty_files = settings.hdfs_skip_empty_files, + .list_object_keys_size = 0, /// HDFS does not support listing in batches. + .throw_on_zero_files_match = settings.hdfs_throw_on_zero_files_match, .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist, }; } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 0a502857153..cac09ee1d92 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -2,17 +2,18 @@ #include "config.h" #if USE_HDFS -#include +#include #include -#include #include namespace DB { -class StorageHDFSConfiguration : public StorageObjectStorageConfiguration +class StorageHDFSConfiguration : public StorageObjectStorage::Configuration { public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type_name = "hdfs"; static constexpr auto engine_name = "HDFS"; @@ -26,7 +27,6 @@ public: void setPath(const Path & path_) override { path = path_; } const Paths & getPaths() const override { return paths; } - Paths & getPaths() override { return paths; } void setPaths(const Paths & paths_) override { paths = paths_; } String getNamespace() const override { return ""; } @@ -35,12 +35,12 @@ public: void check(ContextPtr context) const override; ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT - StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } + ConfigurationPtr clone() override { return std::make_shared(*this); } void addStructureAndFormatToArgs( ASTs & args, const String & structure_, const String & format_, ContextPtr context) override; - std::string getPathWithoutGlob() const override; + std::string getPathWithoutGlobs() const override; private: void fromNamedCollection(const NamedCollection &) override; diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index b37b9de746b..be339d021dc 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -114,10 +114,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory= file_size) - // { - // return false; - // } + if (file_size != 0 && file_offset >= file_size) + { + return false; + } ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read); int bytes_read; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 9c1d3f79c2b..3705725ffe1 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -10,7 +10,6 @@ namespace ErrorCodes { extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; extern const int CANNOT_DETECT_FORMAT; - } ReadBufferIterator::ReadBufferIterator( @@ -29,18 +28,19 @@ ReadBufferIterator::ReadBufferIterator( , query_settings(configuration->getQuerySettings(context_)) , schema_cache(schema_cache_) , read_keys(read_keys_) - , format(configuration->format == "auto" ? std::nullopt : std::optional(configuration->format)) , prev_read_keys_size(read_keys_.size()) { + if (configuration->format != "auto") + format = configuration->format; } SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path, const String & format_name) const { - auto source = fs::path(configuration->getDataSourceDescription()) / path; + auto source = std::filesystem::path(configuration->getDataSourceDescription()) / path; return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext()); } -SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const +SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const { Strings sources; sources.reserve(read_keys.size()); @@ -49,7 +49,7 @@ SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const std::back_inserter(sources), [&](const auto & elem) { - return fs::path(configuration->getDataSourceDescription()) / elem->relative_path; + return std::filesystem::path(configuration->getDataSourceDescription()) / elem->relative_path; }); return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); } @@ -66,16 +66,14 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( const auto & object_info = (*it); auto get_last_mod_time = [&] -> std::optional { - if (object_info->metadata) - return object_info->metadata->last_modified.epochTime(); - else - { - object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); - return object_info->metadata->last_modified.epochTime(); - } + if (!object_info->metadata) + object_info->metadata = object_storage->tryGetObjectMetadata(object_info->relative_path); + + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; }; - chassert(object_info); if (format) { auto cache_key = getKeyForSchemaCache(object_info->relative_path, *format); @@ -105,14 +103,12 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) { - chassert(current_object_info); if (query_settings.schema_inference_use_cache) schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path, *format), num_rows); } void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) { - chassert(current_object_info); if (query_settings.schema_inference_use_cache && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) { @@ -125,7 +121,7 @@ void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns) if (query_settings.schema_inference_use_cache && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT) { - schema_cache.addManyColumns(getPathsForSchemaCache(), columns); + schema_cache.addManyColumns(getKeysForSchemaCache(), columns); } } @@ -144,15 +140,11 @@ String ReadBufferIterator::getLastFileName() const std::unique_ptr ReadBufferIterator::recreateLastReadBuffer() { - chassert(current_object_info); - - auto impl = object_storage->readObject( - StoredObject(current_object_info->relative_path), getContext()->getReadSettings()); - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - return wrapReadBufferWithCompressionMethod( - std::move(impl), chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), - zstd_window_log_max); + auto context = getContext(); + auto impl = object_storage->readObject(StoredObject(current_object_info->relative_path), context->getReadSettings()); + const auto compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method); + const auto zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); + return wrapReadBufferWithCompressionMethod(std::move(impl), compression_method, zstd_window_log_max); } ReadBufferIterator::Data ReadBufferIterator::next() @@ -190,16 +182,21 @@ ReadBufferIterator::Data ReadBufferIterator::next() if (first) { if (format.has_value()) + { throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " + "The table structure cannot be extracted from a {} format file, " + "because there are no files with provided path " "in {} or all files are empty. You can specify table structure manually", *format, object_storage->getName()); + } throw Exception( ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in {} or all files are empty. You can specify the format manually", object_storage->getName()); + "The data format cannot be detected by the contents of the files, " + "because there are no files with provided path " + "in {} or all files are empty. You can specify the format manually", + object_storage->getName()); } return {nullptr, std::nullopt, format}; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 2d58e1c789e..287e316e243 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -1,8 +1,7 @@ #pragma once #include -#include -#include #include +#include namespace DB @@ -12,6 +11,9 @@ class ReadBufferIterator : public IReadBufferIterator, WithContext { public: using FileIterator = std::shared_ptr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + using ObjectInfos = StorageObjectStorage::ObjectInfos; ReadBufferIterator( ObjectStoragePtr object_storage_, @@ -40,7 +42,7 @@ public: private: SchemaCache::Key getKeyForSchemaCache(const String & path, const String & format_name) const; - SchemaCache::Keys getPathsForSchemaCache() const; + SchemaCache::Keys getKeysForSchemaCache() const; std::optional tryGetColumnsFromCache( const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end); diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp deleted file mode 100644 index f19e01cdc3e..00000000000 --- a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -ReadFromObjectStorageStep::ReadFromObjectStorageStep( - ObjectStoragePtr object_storage_, - ConfigurationPtr configuration_, - const String & name_, - const Names & columns_to_read, - const NamesAndTypesList & virtual_columns_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const std::optional & format_settings_, - bool distributed_processing_, - ReadFromFormatInfo info_, - SchemaCache & schema_cache_, - const bool need_only_count_, - ContextPtr context_, - size_t max_block_size_, - size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) - , object_storage(object_storage_) - , configuration(configuration_) - , info(std::move(info_)) - , virtual_columns(virtual_columns_) - , format_settings(format_settings_) - , query_settings(configuration->getQuerySettings(context_)) - , schema_cache(schema_cache_) - , name(name_ + "Source") - , need_only_count(need_only_count_) - , max_block_size(max_block_size_) - , num_streams(num_streams_) - , distributed_processing(distributed_processing_) -{ -} - -void ReadFromObjectStorageStep::createIterator(const ActionsDAG::Node * predicate) -{ - if (!iterator_wrapper) - { - auto context = getContext(); - iterator_wrapper = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, distributed_processing, - context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); - } -} - -void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes) -{ - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - createIterator(predicate); -} - -void ReadFromObjectStorageStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - createIterator(nullptr); - auto context = getContext(); - - Pipes pipes; - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - getName(), object_storage, configuration, info, format_settings, query_settings, - context, max_block_size, iterator_wrapper, need_only_count, schema_cache); - - source->setKeyCondition(filter_actions_dag, context); - pipes.emplace_back(std::move(source)); - } - - auto pipe = Pipe::unitePipes(std::move(pipes)); - if (pipe.empty()) - pipe = Pipe(std::make_shared(info.source_header)); - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); -} - -} diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h deleted file mode 100644 index d98ebfef1f2..00000000000 --- a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ - -class ReadFromObjectStorageStep : public SourceStepWithFilter -{ -public: - using ConfigurationPtr = StorageObjectStorageConfigurationPtr; - - ReadFromObjectStorageStep( - ObjectStoragePtr object_storage_, - ConfigurationPtr configuration_, - const String & name_, - const Names & columns_to_read, - const NamesAndTypesList & virtual_columns_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const std::optional & format_settings_, - bool distributed_processing_, - ReadFromFormatInfo info_, - SchemaCache & schema_cache_, - bool need_only_count_, - ContextPtr context_, - size_t max_block_size_, - size_t num_streams_); - - std::string getName() const override { return name; } - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - -private: - ObjectStoragePtr object_storage; - ConfigurationPtr configuration; - std::shared_ptr iterator_wrapper; - - const ReadFromFormatInfo info; - const NamesAndTypesList virtual_columns; - const std::optional format_settings; - const StorageObjectStorage::QuerySettings query_settings; - SchemaCache & schema_cache; - const String name; - const bool need_only_count; - const size_t max_block_size; - const size_t num_streams; - const bool distributed_processing; - - void createIterator(const ActionsDAG::Node * predicate); -}; - -} diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index bfd61c647f8..9fcbc6a6816 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -1,17 +1,23 @@ #include #if USE_AWS_S3 - #include +#include #include + +#include #include -#include + #include #include + #include #include #include +#include +#include + namespace DB { namespace ErrorCodes @@ -46,7 +52,7 @@ static const std::unordered_set optional_configuration_keys = String StorageS3Configuration::getDataSourceDescription() { - return fs::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; + return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; } void StorageS3Configuration::check(ContextPtr context) const @@ -54,7 +60,7 @@ void StorageS3Configuration::check(ContextPtr context) const validateNamespace(url.bucket); context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri); context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast); - StorageObjectStorageConfiguration::check(context); + Configuration::check(context); } void StorageS3Configuration::validateNamespace(const String & name) const @@ -63,7 +69,7 @@ void StorageS3Configuration::validateNamespace(const String & name) const } StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other) - : StorageObjectStorageConfiguration(other) + : Configuration(other) { url = other.url; static_configuration = other.static_configuration; @@ -91,11 +97,12 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, assertInitialized(); const auto & config = context->getConfigRef(); + const auto & settings = context->getSettingsRef(); const std::string config_prefix = "s3."; - auto s3_settings = getSettings(config, config_prefix, context, false); /// FIXME: add a setting + auto s3_settings = getSettings(config, config_prefix, context, settings.s3_validate_request_settings); - request_settings.updateFromSettingsIfChanged(context->getSettingsRef()); + request_settings.updateFromSettingsIfChanged(settings); auth_settings.updateFrom(s3_settings->auth_settings); s3_settings->auth_settings = auth_settings; diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index de4a6d17579..9eb724c4a64 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -4,17 +4,17 @@ #if USE_AWS_S3 -#include #include -#include -#include +#include namespace DB { -class StorageS3Configuration : public StorageObjectStorageConfiguration +class StorageS3Configuration : public StorageObjectStorage::Configuration { public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type_name = "s3"; StorageS3Configuration() = default; @@ -27,7 +27,6 @@ public: void setPath(const Path & path) override { url.key = path; } const Paths & getPaths() const override { return keys; } - Paths & getPaths() override { return keys; } void setPaths(const Paths & paths) override { keys = paths; } String getNamespace() const override { return url.bucket; } @@ -37,7 +36,7 @@ public: void check(ContextPtr context) const override; void validateNamespace(const String & name) const override; - StorageObjectStorageConfigurationPtr clone() override { return std::make_shared(*this); } + ConfigurationPtr clone() override { return std::make_shared(*this); } bool isStaticConfiguration() const override { return static_configuration; } ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index c5565d8b0e8..2c9831f0d29 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -2,21 +2,25 @@ #include #include -#include #include +#include + +#include #include +#include +#include #include #include -#include + #include +#include #include -#include +#include +#include +#include #include #include -#include #include -#include -#include namespace DB @@ -26,6 +30,7 @@ namespace ErrorCodes { extern const int DATABASE_ACCESS_DENIED; extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } StorageObjectStorage::StorageObjectStorage( @@ -90,6 +95,110 @@ void StorageObjectStorage::updateConfiguration(ContextPtr context) object_storage->applyNewSettings(context->getConfigRef(), "s3.", context); } +namespace +{ +class ReadFromObjectStorageStep : public SourceStepWithFilter +{ +public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + + ReadFromObjectStorageStep( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + const String & name_, + const Names & columns_to_read, + const NamesAndTypesList & virtual_columns_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const std::optional & format_settings_, + bool distributed_processing_, + ReadFromFormatInfo info_, + SchemaCache & schema_cache_, + const bool need_only_count_, + ContextPtr context_, + size_t max_block_size_, + size_t num_streams_) + : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) + , object_storage(object_storage_) + , configuration(configuration_) + , schema_cache(schema_cache_) + , info(std::move(info_)) + , virtual_columns(virtual_columns_) + , format_settings(format_settings_) + , query_settings(configuration->getQuerySettings(context_)) + , name(name_ + "Source") + , need_only_count(need_only_count_) + , max_block_size(max_block_size_) + , num_streams(num_streams_) + , distributed_processing(distributed_processing_) + { + } + + std::string getName() const override { return name; } + + void applyFilters(ActionDAGNodes added_filter_nodes) override + { + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + createIterator(predicate); + } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + createIterator(nullptr); + Pipes pipes; + auto context = getContext(); + + for (size_t i = 0; i < num_streams; ++i) + { + auto source = std::make_shared( + getName(), object_storage, configuration, info, format_settings, query_settings, + context, max_block_size, iterator_wrapper, need_only_count, schema_cache); + + source->setKeyCondition(filter_actions_dag, context); + pipes.emplace_back(std::move(source)); + } + + auto pipe = Pipe::unitePipes(std::move(pipes)); + if (pipe.empty()) + pipe = Pipe(std::make_shared(info.source_header)); + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); + } + +private: + ObjectStoragePtr object_storage; + ConfigurationPtr configuration; + std::shared_ptr iterator_wrapper; + SchemaCache & schema_cache; + + const ReadFromFormatInfo info; + const NamesAndTypesList virtual_columns; + const std::optional format_settings; + const StorageObjectStorage::QuerySettings query_settings; + const String name; + const bool need_only_count; + const size_t max_block_size; + const size_t num_streams; + const bool distributed_processing; + + void createIterator(const ActionsDAG::Node * predicate) + { + if (iterator_wrapper) + return; + auto context = getContext(); + iterator_wrapper = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, distributed_processing, + context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); + } +}; +} + void StorageObjectStorage::read( QueryPlan & query_plan, const Names & column_names, @@ -123,7 +232,7 @@ void StorageObjectStorage::read( storage_snapshot, format_settings, distributed_processing, - std::move(read_from_format_info), + read_from_format_info, getSchemaCache(local_context), need_only_count, local_context, @@ -169,12 +278,13 @@ SinkToStoragePtr StorageObjectStorage::write( getName(), configuration->getPath()); } - auto & paths = configuration->getPaths(); + auto paths = configuration->getPaths(); if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( *object_storage, *configuration, settings, paths.front(), paths.size())) { paths.push_back(*new_key); } + configuration->setPaths(paths); return std::make_shared( object_storage, @@ -185,10 +295,10 @@ SinkToStoragePtr StorageObjectStorage::write( } void StorageObjectStorage::truncate( - const ASTPtr &, - const StorageMetadataPtr &, - ContextPtr, - TableExclusiveLockHolder &) + const ASTPtr & /* query */, + const StorageMetadataPtr & /* metadata_snapshot */, + ContextPtr /* context */, + TableExclusiveLockHolder & /* table_holder */) { if (configuration->withGlobs()) { @@ -233,10 +343,8 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData( const ContextPtr & context) { ObjectInfos read_keys; - auto read_buffer_iterator = createReadBufferIterator( - object_storage, configuration, format_settings, read_keys, context); - return readSchemaFromFormat( - configuration->format, format_settings, *read_buffer_iterator, context); + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + return readSchemaFromFormat(configuration->format, format_settings, *iterator, context); } std::string StorageObjectStorage::resolveFormatFromData( @@ -246,10 +354,8 @@ std::string StorageObjectStorage::resolveFormatFromData( const ContextPtr & context) { ObjectInfos read_keys; - auto read_buffer_iterator = createReadBufferIterator( - object_storage, configuration, format_settings, read_keys, context); - return detectFormatAndReadSchema( - format_settings, *read_buffer_iterator, context).second; + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + return detectFormatAndReadSchema(format_settings, *iterator, context).second; } std::pair StorageObjectStorage::resolveSchemaAndFormatFromData( @@ -259,10 +365,8 @@ std::pair StorageObjectStorage::resolveSchemaAn const ContextPtr & context) { ObjectInfos read_keys; - auto read_buffer_iterator = createReadBufferIterator( - object_storage, configuration, format_settings, read_keys, context); - - auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context); + auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); + auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context); configuration->format = format; return std::pair(columns, format); } @@ -302,4 +406,65 @@ SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, c throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name); } +void StorageObjectStorage::Configuration::initialize( + Configuration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) + configuration.fromNamedCollection(*named_collection); + else + configuration.fromAST(engine_args, local_context, with_table_structure); + + // FIXME: it should be - if (format == "auto" && get_format_from_file) + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); + else + FormatFactory::instance().checkFormatName(configuration.format); + + configuration.initialized = true; +} + +void StorageObjectStorage::Configuration::check(ContextPtr) const +{ + FormatFactory::instance().checkFormatName(format); +} + +StorageObjectStorage::Configuration::Configuration(const Configuration & other) +{ + format = other.format; + compression_method = other.compression_method; + structure = other.structure; +} + +bool StorageObjectStorage::Configuration::withWildcard() const +{ + static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + return getPath().find(PARTITION_ID_WILDCARD) != String::npos + || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; +} + +bool StorageObjectStorage::Configuration::isPathWithGlobs() const +{ + return getPath().find_first_of("*?{") != std::string::npos; +} + +bool StorageObjectStorage::Configuration::isNamespaceWithGlobs() const +{ + return getNamespace().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorage::Configuration::getPathWithoutGlobs() const +{ + return getPath().substr(0, getPath().find_first_of("*?{")); +} + +void StorageObjectStorage::Configuration::assertInitialized() const +{ + if (!initialized) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage"); + } +} } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index d46a875bf42..46d422b26c2 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -2,15 +2,16 @@ #include #include #include +#include #include #include namespace DB { -class StorageObjectStorageConfiguration; class ReadBufferIterator; class SchemaCache; +class NamedCollection; /** * A general class containing implementation for external table engines @@ -20,7 +21,7 @@ class SchemaCache; class StorageObjectStorage : public IStorage { public: - using Configuration = StorageObjectStorageConfiguration; + class Configuration; using ConfigurationPtr = std::shared_ptr; using ObjectInfo = RelativePathWithMetadata; using ObjectInfoPtr = std::shared_ptr; @@ -134,4 +135,61 @@ protected: std::mutex configuration_update_mutex; }; +class StorageObjectStorage::Configuration +{ +public: + Configuration() = default; + Configuration(const Configuration & other); + virtual ~Configuration() = default; + + using Path = std::string; + using Paths = std::vector; + + static void initialize( + Configuration & configuration, + ASTs & engine_args, + ContextPtr local_context, + bool with_table_structure); + + virtual std::string getTypeName() const = 0; + virtual std::string getEngineName() const = 0; + + virtual Path getPath() const = 0; + virtual void setPath(const Path & path) = 0; + + virtual const Paths & getPaths() const = 0; + virtual void setPaths(const Paths & paths) = 0; + + virtual String getDataSourceDescription() = 0; + virtual String getNamespace() const = 0; + virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; + virtual void addStructureAndFormatToArgs( + ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; + + bool withWildcard() const; + bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } + bool isPathWithGlobs() const; + bool isNamespaceWithGlobs() const; + virtual std::string getPathWithoutGlobs() const; + + virtual void check(ContextPtr context) const; + virtual void validateNamespace(const String & /* name */) const {} + + virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT + virtual ConfigurationPtr clone() = 0; + virtual bool isStaticConfiguration() const { return true; } + + String format = "auto"; + String compression_method = "auto"; + String structure = "auto"; + +protected: + virtual void fromNamedCollection(const NamedCollection & collection) = 0; + virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; + + void assertInitialized() const; + + bool initialized = false; +}; + } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 1a1df399626..193894a1d44 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -1,21 +1,15 @@ #include "Storages/ObjectStorage/StorageObjectStorageCluster.h" -#include "config.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include + +#include #include +#include +#include + namespace DB { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 2db8f5c352e..b38eb722df5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -1,12 +1,10 @@ #pragma once -#include "config.h" - -#include +// #include #include #include #include -#include +// #include namespace DB { @@ -29,17 +27,14 @@ public: std::string getName() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension( - const ActionsDAG::Node * predicate, - const ContextPtr & context) const override; - bool supportsSubcolumns() const override { return true; } bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } -private: - void updateBeforeRead(const ContextPtr & /* context */) override {} + RemoteQueryExecutor::Extension getTaskIteratorExtension( + const ActionsDAG::Node * predicate, const ContextPtr & context) const override; +private: void updateQueryToSendIfNeeded( ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp deleted file mode 100644 index 89c15085274..00000000000 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void StorageObjectStorageConfiguration::initialize( - StorageObjectStorageConfiguration & configuration, - ASTs & engine_args, - ContextPtr local_context, - bool with_table_structure) -{ - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - configuration.fromNamedCollection(*named_collection); - else - configuration.fromAST(engine_args, local_context, with_table_structure); - - // FIXME: it should be - if (format == "auto" && get_format_from_file) - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); - else - FormatFactory::instance().checkFormatName(configuration.format); - - configuration.initialized = true; -} - -void StorageObjectStorageConfiguration::check(ContextPtr) const -{ - FormatFactory::instance().checkFormatName(format); -} - -StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other) -{ - format = other.format; - compression_method = other.compression_method; - structure = other.structure; -} - -bool StorageObjectStorageConfiguration::withWildcard() const -{ - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return getPath().find(PARTITION_ID_WILDCARD) != String::npos - || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; -} - -bool StorageObjectStorageConfiguration::isPathWithGlobs() const -{ - return getPath().find_first_of("*?{") != std::string::npos; -} - -bool StorageObjectStorageConfiguration::isNamespaceWithGlobs() const -{ - return getNamespace().find_first_of("*?{") != std::string::npos; -} - -std::string StorageObjectStorageConfiguration::getPathWithoutGlob() const -{ - return getPath().substr(0, getPath().find_first_of("*?{")); -} - -void StorageObjectStorageConfiguration::assertInitialized() const -{ - if (!initialized) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage"); - } -} - -} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h deleted file mode 100644 index c55362aa8bd..00000000000 --- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once -#include -#include -#include "StorageObjectStorage.h" -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -class StorageObjectStorageConfiguration; -using StorageObjectStorageConfigurationPtr = std::shared_ptr; - -class StorageObjectStorageConfiguration -{ -public: - StorageObjectStorageConfiguration() = default; - StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other); - virtual ~StorageObjectStorageConfiguration() = default; - - using Path = std::string; - using Paths = std::vector; - - static void initialize( - StorageObjectStorageConfiguration & configuration, - ASTs & engine_args, - ContextPtr local_context, - bool with_table_structure); - - virtual std::string getTypeName() const = 0; - virtual std::string getEngineName() const = 0; - - virtual Path getPath() const = 0; - virtual void setPath(const Path & path) = 0; - - virtual const Paths & getPaths() const = 0; - virtual Paths & getPaths() = 0; - virtual void setPaths(const Paths & paths) = 0; - - virtual String getDataSourceDescription() = 0; - virtual String getNamespace() const = 0; - virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; - virtual void addStructureAndFormatToArgs( - ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; - - bool withWildcard() const; - bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } - bool isPathWithGlobs() const; - bool isNamespaceWithGlobs() const; - virtual std::string getPathWithoutGlob() const; - - virtual void check(ContextPtr context) const; - virtual void validateNamespace(const String & /* name */) const {} - - virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT - virtual StorageObjectStorageConfigurationPtr clone() = 0; - virtual bool isStaticConfiguration() const { return true; } - - String format = "auto"; - String compression_method = "auto"; - String structure = "auto"; - -protected: - virtual void fromNamedCollection(const NamedCollection & collection) = 0; - virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; - - void assertInitialized() const; - - bool initialized = false; -}; - -using StorageObjectStorageConfigurationPtr = std::shared_ptr; - -} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 62367a6b933..81bdeaa43a3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -14,14 +14,13 @@ namespace ErrorCodes StorageObjectStorageSink::StorageObjectStorageSink( ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, + ConfigurationPtr configuration, std::optional format_settings_, const Block & sample_block_, ContextPtr context, const std::string & blob_path) : SinkToStorage(sample_block_) , sample_block(sample_block_) - , format_settings(format_settings_) { const auto & settings = context->getSettingsRef(); const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path; @@ -37,7 +36,7 @@ StorageObjectStorageSink::StorageObjectStorageSink( static_cast(settings.output_format_compression_zstd_window_log)); writer = FormatFactory::instance().getOutputFormatParallelIfPossible( - configuration->format, *write_buf, sample_block, context, format_settings); + configuration->format, *write_buf, sample_block, context, format_settings_); } void StorageObjectStorageSink::consume(Chunk chunk) @@ -102,7 +101,7 @@ void StorageObjectStorageSink::release() PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, - StorageObjectStorageConfigurationPtr configuration_, + ConfigurationPtr configuration_, std::optional format_settings_, const Block & sample_block_, ContextPtr context_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 6c2f73e40e3..a3c8ef68cf0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -1,17 +1,18 @@ #pragma once #include -#include #include -#include +#include namespace DB { class StorageObjectStorageSink : public SinkToStorage { public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + StorageObjectStorageSink( ObjectStoragePtr object_storage, - StorageObjectStorageConfigurationPtr configuration, + ConfigurationPtr configuration, std::optional format_settings_, const Block & sample_block_, ContextPtr context, @@ -29,8 +30,6 @@ public: private: const Block sample_block; - const std::optional format_settings; - std::unique_ptr write_buf; OutputFormatPtr writer; bool cancelled = false; @@ -43,9 +42,11 @@ private: class PartitionedStorageObjectStorageSink : public PartitionedSink { public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, - StorageObjectStorageConfigurationPtr configuration_, + ConfigurationPtr configuration_, std::optional format_settings_, const Block & sample_block_, ContextPtr context_, @@ -58,7 +59,8 @@ private: void validateNamespace(const String & str); ObjectStoragePtr object_storage; - StorageObjectStorageConfigurationPtr configuration; + ConfigurationPtr configuration; + const StorageObjectStorage::QuerySettings query_settings; const std::optional format_settings; const Block sample_block; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 4551c2df7c3..b224afb7a58 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -9,10 +9,11 @@ #include #include #include -#include +#include #include #include +namespace fs = std::filesystem; namespace ProfileEvents { @@ -218,11 +219,9 @@ std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const O auto get_last_mod_time = [&]() -> std::optional { - if (object_info->metadata) - { - return object_info->metadata->last_modified.epochTime(); - } - return std::nullopt; + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; }; return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); } @@ -354,7 +353,7 @@ StorageObjectStorageSource::IIterator::IIterator(const std::string & logger_name { } -ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor) { auto object_info = nextImpl(processor); @@ -392,7 +391,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( else if (configuration->isPathWithGlobs()) { const auto key_with_globs = configuration_->getPath(); - const auto key_prefix = configuration->getPathWithoutGlob(); + const auto key_prefix = configuration->getPathWithoutGlobs(); object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size); matcher = std::make_unique(makeRegexpPatternFromGlobs(key_with_globs)); @@ -423,7 +422,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } } -ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor) +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor) { std::lock_guard lock(next_mutex); auto object_info = nextImplUnlocked(processor); @@ -439,7 +438,7 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processo return object_info; } -ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */) +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */) { bool current_batch_processed = object_infos.empty() || index >= object_infos.size(); if (is_finished && current_batch_processed) @@ -533,7 +532,7 @@ StorageObjectStorageSource::KeysIterator::KeysIterator( } } -ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */) +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */) { while (true) { @@ -614,7 +613,7 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator( } } -ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t) +StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= buffer.size()) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 0afbf77db2b..356478422bc 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include namespace DB @@ -16,6 +16,11 @@ class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { friend class StorageS3QueueSource; public: + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + using ObjectInfo = StorageObjectStorage::ObjectInfo; + using ObjectInfos = StorageObjectStorage::ObjectInfos; + using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + class IIterator; class ReadTaskIterator; class GlobIterator; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h deleted file mode 100644 index 241e2f20962..00000000000 --- a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -using ConfigurationPtr = StorageObjectStorageConfigurationPtr; -using ObjectInfo = RelativePathWithMetadata; -using ObjectInfoPtr = std::shared_ptr; -using ObjectInfos = std::vector; - -} diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index 2a7236ab196..bde3cb7e1cb 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { @@ -47,14 +47,15 @@ void resolveSchemaAndFormat( ColumnsDescription & columns, std::string & format, ObjectStoragePtr object_storage, - const StorageObjectStorageConfigurationPtr & configuration, + const StorageObjectStorage::ConfigurationPtr & configuration, std::optional format_settings, const ContextPtr & context) { if (columns.empty()) { if (format == "auto") - std::tie(columns, format) = StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); + std::tie(columns, format) = + StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); else columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context); } diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h index 3a752e6b8f0..2077999df41 100644 --- a/src/Storages/ObjectStorage/Utils.h +++ b/src/Storages/ObjectStorage/Utils.h @@ -1,14 +1,10 @@ #pragma once -#include #include "StorageObjectStorage.h" namespace DB { class IObjectStorage; -class StorageObjectStorageConfiguration; -using StorageObjectStorageConfigurationPtr = std::shared_ptr; -struct StorageObjectStorageSettings; std::optional checkAndGetNewFileOnInsertIfNeeded( const IObjectStorage & object_storage, @@ -21,7 +17,7 @@ void resolveSchemaAndFormat( ColumnsDescription & columns, std::string & format, ObjectStoragePtr object_storage, - const StorageObjectStorageConfigurationPtr & configuration, + const StorageObjectStorage::ConfigurationPtr & configuration, std::optional format_settings, const ContextPtr & context); diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index 06b8aefb716..c23b180215e 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -1,8 +1,8 @@ -#include +#include #include #include #include -#include +#include #include #include @@ -18,13 +18,15 @@ namespace ErrorCodes static std::shared_ptr createStorageObjectStorage( const StorageFactory::Arguments & args, - typename StorageObjectStorage::ConfigurationPtr configuration, + StorageObjectStorage::ConfigurationPtr configuration, ContextPtr context) { auto & engine_args = args.engine_args; if (engine_args.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, context, false); + // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -75,10 +77,8 @@ void registerStorageAzure(StorageFactory & factory) { factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args) { - auto context = args.getLocalContext(); - auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); - return createStorageObjectStorage(args, configuration, context); + auto configuration = std::make_shared(); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); }, { .supports_settings = true, @@ -94,10 +94,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) { factory.registerStorage(name, [=](const StorageFactory::Arguments & args) { - auto context = args.getLocalContext(); auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); - return createStorageObjectStorage(args, configuration, context); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); }, { .supports_settings = true, @@ -129,10 +127,8 @@ void registerStorageHDFS(StorageFactory & factory) { factory.registerStorage("HDFS", [=](const StorageFactory::Arguments & args) { - auto context = args.getLocalContext(); auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false); - return createStorageObjectStorage(args, configuration, context); + return createStorageObjectStorage(args, configuration, args.getLocalContext()); }, { .supports_settings = true, diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp index 8354e6aa2ae..f0b7568ae7f 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -32,7 +33,7 @@ namespace S3QueueTableMetadata::S3QueueTableMetadata( - const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorage::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h index 2158b189070..bb8f8ccf2c4 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -4,7 +4,7 @@ #include #include -#include +#include #include namespace DB @@ -29,7 +29,7 @@ struct S3QueueTableMetadata S3QueueTableMetadata() = default; S3QueueTableMetadata( - const StorageObjectStorageConfiguration & configuration, + const StorageObjectStorage::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index e84dabecf3b..38934a7895a 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -591,7 +591,7 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = std::make_shared(); - StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getContext(), false); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp index a2d3f342a63..b67a8b23e9d 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace DB { @@ -84,7 +84,7 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C #endif fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL"); #if USE_AZURE_BLOB_STORAGE - fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureBlobConfiguration::type_name), "Azure"); + fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureConfiguration::type_name), "Azure"); #endif } diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 02c8c623e61..6ad8689a9b4 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 06676a8adfa..a997b34a75c 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -1,23 +1,23 @@ #include "config.h" +#include +#include +#include #include + #include +#include #include #include + #include -#include -#include -#include -#include -#include -#include + #include #include -#include -#include -#include -#include -#include "registerTableFunctions.h" +#include +#include +#include +#include namespace DB @@ -29,8 +29,7 @@ namespace ErrorCodes } template -ObjectStoragePtr TableFunctionObjectStorage< - Definition, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const +ObjectStoragePtr TableFunctionObjectStorage::getObjectStorage(const ContextPtr & context, bool create_readonly) const { if (!object_storage) object_storage = configuration->createObjectStorage(context, create_readonly); @@ -38,8 +37,7 @@ ObjectStoragePtr TableFunctionObjectStorage< } template -StorageObjectStorageConfigurationPtr TableFunctionObjectStorage< - Definition, Configuration>::getConfiguration() const +StorageObjectStorage::ConfigurationPtr TableFunctionObjectStorage::getConfiguration() const { if (!configuration) configuration = std::make_shared(); @@ -47,8 +45,8 @@ StorageObjectStorageConfigurationPtr TableFunctionObjectStorage< } template -std::vector TableFunctionObjectStorage< - Definition, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +std::vector TableFunctionObjectStorage::skipAnalysisForArguments( + const QueryTreeNodePtr & query_node_table_function, ContextPtr) const { auto & table_function_node = query_node_table_function->as(); auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); @@ -64,19 +62,6 @@ std::vector TableFunctionObjectStorage< return result; } -template -void TableFunctionObjectStorage::updateStructureAndFormatArgumentsIfNeeded( - ASTs & args, const String & structure, const String & format, const ContextPtr & context) -{ - Configuration().addStructureAndFormatToArgs(args, structure, format, context); -} - -template -void TableFunctionObjectStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) -{ - StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true); -} - template void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) { @@ -94,32 +79,16 @@ template ColumnsDescription TableFunctionObjectStorage< Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const { - chassert(configuration); if (configuration->structure == "auto") { context->checkAccess(getSourceAccessType()); - auto storage = getObjectStorage(context, !is_insert_query); ColumnsDescription columns; + auto storage = getObjectStorage(context, !is_insert_query); resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context); return columns; } - - return parseColumnsListFromString(configuration->structure, context); -} - -template -bool TableFunctionObjectStorage< - Definition, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context) -{ - chassert(configuration); - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); -} - -template -std::unordered_set TableFunctionObjectStorage< - Definition, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const -{ - return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); + else + return parseColumnsListFromString(configuration->structure, context); } template @@ -205,7 +174,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) #endif #if USE_AZURE_BLOB_STORAGE - factory.registerFunction>( + factory.registerFunction>( { .documentation = { @@ -229,8 +198,8 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) } #if USE_AZURE_BLOB_STORAGE -template class TableFunctionObjectStorage; -template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; +template class TableFunctionObjectStorage; #endif #if USE_AWS_S3 diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index bd43cae3697..bbc40cc6191 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -1,19 +1,18 @@ #pragma once #include "config.h" - #include -#include -#include +#include #include - +#include +#include namespace DB { class Context; class StorageS3Configuration; -class StorageAzureBlobConfiguration; +class StorageAzureConfiguration; class StorageHDFSConfiguration; struct S3StorageSettings; struct AzureStorageSettings; @@ -104,20 +103,32 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override + { + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); + } - std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); + } - virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context) + { + StorageObjectStorage::Configuration::initialize(*getConfiguration(), args, context, true); + } static void updateStructureAndFormatArgumentsIfNeeded( ASTs & args, const String & structure, const String & format, - const ContextPtr & context); + const ContextPtr & context) + { + Configuration().addStructureAndFormatToArgs(args, structure, format, context); + } protected: - using ConfigurationPtr = StorageObjectStorageConfigurationPtr; + using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; StoragePtr executeImpl( const ASTPtr & ast_function, @@ -146,7 +157,7 @@ using TableFunctionS3 = TableFunctionObjectStorage; +using TableFunctionAzureBlob = TableFunctionObjectStorage; #endif #if USE_HDFS diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp index ce78076dd21..449bd2c8c49 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB @@ -109,7 +109,7 @@ template class TableFunctionObjectStorageCluster; +template class TableFunctionObjectStorageCluster; #endif #if USE_HDFS diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h index a8bc11b5e40..76786fafe99 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.h +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -13,7 +13,7 @@ class Context; class StorageS3Settings; class StorageAzureBlobSettings; class StorageS3Configuration; -class StorageAzureBlobConfiguration; +class StorageAzureConfiguration; struct AzureClusterDefinition { @@ -90,7 +90,7 @@ using TableFunctionS3Cluster = TableFunctionObjectStorageCluster; +using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster; #endif #if USE_HDFS diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 5e0bc3267d8..26b9a771416 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -29,18 +29,6 @@ void registerTableFunctions() registerTableFunctionFuzzJSON(factory); #endif -#if USE_AWS_S3 - // registerTableFunctionS3Cluster(factory); - // registerTableFunctionHudi(factory); -#if USE_PARQUET - // registerTableFunctionDeltaLake(factory); -#endif -#if USE_AVRO - // registerTableFunctionIceberg(factory); -#endif - -#endif - #if USE_HIVE registerTableFunctionHive(factory); #endif diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml index 4210c13b727..7df7b56b3b4 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml +++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml @@ -5,6 +5,7 @@ 1000000 1 + 0 diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml index 95a313ea4f2..c1ca258f6c4 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml +++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml @@ -5,6 +5,7 @@ 5 0 + 0 From 0db76bf631475c6a7647096baf26bfdac35cc181 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 26 Apr 2024 18:52:49 +0000 Subject: [PATCH 101/651] Add more tests and docs, fix collecting statistics, fix prefetching columns in wide parts --- src/Columns/ColumnDynamic.cpp | 4 +- src/Columns/ColumnNullable.cpp | 19 ++++ src/Columns/ColumnNullable.h | 3 + src/DataTypes/Serializations/ISerialization.h | 37 ++++---- .../Serializations/SerializationArray.cpp | 3 +- .../Serializations/SerializationDynamic.cpp | 88 ++++++++++--------- .../SerializationDynamicElement.cpp | 35 ++++++-- .../Serializations/SerializationMap.cpp | 3 +- .../Serializations/SerializationTuple.cpp | 41 ++++----- .../Serializations/SerializationVariant.cpp | 24 ++--- .../SerializationVariantElement.cpp | 84 ++++++++---------- .../MergeTree/MergeTreeReaderWide.cpp | 9 +- src/Storages/MergeTree/MutateTask.cpp | 16 +--- .../03034_dynamic_conversions.reference | 25 ++++++ .../0_stateless/03034_dynamic_conversions.sql | 10 +++ .../03037_dynamic_merges_1.reference | 18 ++-- .../0_stateless/03037_dynamic_merges_1.sh | 17 ++-- .../03037_dynamic_merges_2.reference | 20 +++++ .../0_stateless/03037_dynamic_merges_2.sh | 2 +- ... => 03040_dynamic_type_alters.sh.disabled} | 0 20 files changed, 275 insertions(+), 183 deletions(-) create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2.reference rename tests/queries/0_stateless/{03040_dynamic_type_alters.sh => 03040_dynamic_type_alters.sh.disabled} (100%) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 293055b43fc..3074504973a 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -687,7 +687,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so } size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name); -// LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size); + LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size); it->second += size; } } @@ -701,7 +701,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so variants_with_sizes.reserve(all_variants.size()); for (const auto & variant : all_variants) { -// LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]); + LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]); variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); } std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 4474816601e..011f3702bdf 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -900,4 +900,23 @@ ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column) return column; } +ColumnPtr removeNullable(const ColumnPtr & column) +{ + if (const auto * column_nullable = typeid_cast(column.get())) + return column_nullable->getNestedColumnPtr(); + return column; +} + +ColumnPtr removeNullableOrLowCardinalityNullable(const ColumnPtr & column) +{ + if (const auto * column_low_cardinality = typeid_cast(column.get())) + { + if (!column_low_cardinality->nestedIsNullable()) + return column; + return column_low_cardinality->cloneWithDefaultOnNull(); + } + + return removeNullable(column); +} + } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 73bd75527f8..4e6f05b35ec 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -210,4 +210,7 @@ ColumnPtr makeNullableSafe(const ColumnPtr & column); ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column); ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column); +ColumnPtr removeNullable(const ColumnPtr & column); +ColumnPtr removeNullableOrLowCardinalityNullable(const ColumnPtr & column); + } diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 65493cf6dda..ddbed34f614 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -99,6 +99,19 @@ public: using SubcolumnCreatorPtr = std::shared_ptr; + struct SerializeBinaryBulkState + { + virtual ~SerializeBinaryBulkState() = default; + }; + + struct DeserializeBinaryBulkState + { + virtual ~DeserializeBinaryBulkState() = default; + }; + + using SerializeBinaryBulkStatePtr = std::shared_ptr; + using DeserializeBinaryBulkStatePtr = std::shared_ptr; + struct SubstreamData { SubstreamData() = default; @@ -125,10 +138,17 @@ public: return *this; } + SubstreamData & withDeserializePrefix(DeserializeBinaryBulkStatePtr deserialize_prefix_state_) + { + deserialize_prefix_state = std::move(deserialize_prefix_state_); + return *this; + } + SerializationPtr serialization; DataTypePtr type; ColumnPtr column; SerializationInfoPtr serialization_info; + DeserializeBinaryBulkStatePtr deserialize_prefix_state; }; struct Substream @@ -221,21 +241,6 @@ public: using OutputStreamGetter = std::function; using InputStreamGetter = std::function; - struct SerializeBinaryBulkState - { - virtual ~SerializeBinaryBulkState() = default; - }; - - struct DeserializeBinaryBulkState - { - virtual ~DeserializeBinaryBulkState() = default; - }; - - using SerializeBinaryBulkStatePtr = std::shared_ptr; - using DeserializeBinaryBulkStatePtr = std::shared_ptr; - - using SubstreamsDeserializeStatesCache = std::unordered_map; - struct SerializeBinaryBulkSettings { OutputStreamGetter getter; @@ -285,6 +290,8 @@ public: SerializeBinaryBulkSettings & /*settings*/, SerializeBinaryBulkStatePtr & /*state*/) const {} + using SubstreamsDeserializeStatesCache = std::unordered_map; + /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr. virtual void deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & /*settings*/, diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index d6546b338b5..6a8555a3714 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -254,7 +254,8 @@ void SerializationArray::enumerateStreams( auto next_data = SubstreamData(nested) .withType(type_array ? type_array->getNestedType() : nullptr) .withColumn(column_array ? column_array->getDataPtr() : nullptr) - .withSerializationInfo(data.serialization_info); + .withSerializationInfo(data.serialization_info) + .withDeserializePrefix(data.deserialize_prefix_state); nested->enumerateStreams(settings, callback, next_data); settings.path.pop_back(); diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index c9fe8dd6b29..858445ed257 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -21,45 +21,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -void SerializationDynamic::enumerateStreams( - EnumerateStreamsSettings & settings, - const StreamCallback & callback, - const SubstreamData & data) const -{ - settings.path.push_back(Substream::DynamicStructure); - callback(settings.path); - settings.path.pop_back(); - - const auto * column_dynamic = data.column ? &assert_cast(*data.column) : nullptr; - - /// If column is nullptr, nothing to enumerate as we don't have any variants. - if (!column_dynamic) - return; - - const auto & variant_info = column_dynamic->getVariantInfo(); - auto variant_serialization = variant_info.variant_type->getDefaultSerialization(); - - settings.path.push_back(Substream::DynamicData); - auto variant_data = SubstreamData(variant_serialization) - .withType(variant_info.variant_type) - .withColumn(column_dynamic->getVariantColumnPtr()) - .withSerializationInfo(data.serialization_info); - settings.path.back().data = variant_data; - variant_serialization->enumerateStreams(settings, callback, variant_data); - settings.path.pop_back(); -} - -SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast(version)) -{ - checkVersion(version); -} - -void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version) -{ - if (version != VariantTypeName) - throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization."); -} - struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState { SerializationDynamic::DynamicStructureSerializationVersion structure_version; @@ -68,10 +29,6 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB SerializationPtr variant_serialization; ISerialization::SerializeBinaryBulkStatePtr variant_state; - /// Pointer to currently serialized dynamic column. - /// Used to calculate statistics for the whole column and not for some range. - const ColumnDynamic * current_dynamic_column = nullptr; - /// Variants statistics. Map (Variant name) -> (Variant size). ColumnDynamic::Statistics statistics = { .source =ColumnDynamic::Statistics::Source::READ }; @@ -91,6 +48,47 @@ struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBin ISerialization::DeserializeBinaryBulkStatePtr structure_state; }; +void SerializationDynamic::enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const +{ + settings.path.push_back(Substream::DynamicStructure); + callback(settings.path); + settings.path.pop_back(); + + const auto * column_dynamic = data.column ? &assert_cast(*data.column) : nullptr; + const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + + /// If column is nullptr and we didn't deserizlize prefix yet, nothing to enumerate as we don't have any variants. + if (!column_dynamic && !deserialize_prefix_state) + return; + + const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState(deserialize_prefix_state->structure_state)->variant_type; + auto variant_serialization = variant_type->getDefaultSerialization(); + + settings.path.push_back(Substream::DynamicData); + auto variant_data = SubstreamData(variant_serialization) + .withType(variant_type) + .withColumn(column_dynamic ? column_dynamic->getVariantColumnPtr() : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_state : nullptr); + settings.path.back().data = variant_data; + variant_serialization->enumerateStreams(settings, callback, variant_data); + settings.path.pop_back(); +} + +SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast(version)) +{ + checkVersion(version); +} + +void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version) +{ + if (version != VariantTypeName) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization."); +} + void SerializationDynamic::serializeBinaryBulkStatePrefix( const DB::IColumn & column, SerializeBinaryBulkSettings & settings, @@ -245,6 +243,10 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams( if (!variant_info.variant_type->equals(*dynamic_state->variant_type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName()); + /// Update statistics. + if (offset == 0) + dynamic_state->updateStatistics(*variant_column); + settings.path.push_back(Substream::DynamicData); dynamic_state->variant_serialization->serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state); settings.path.pop_back(); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index 386a6579519..9be9802d926 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -14,17 +14,41 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } + +struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState +{ + ISerialization::DeserializeBinaryBulkStatePtr structure_state; + SerializationPtr variant_serialization; + ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; +}; + void SerializationDynamicElement::enumerateStreams( DB::ISerialization::EnumerateStreamsSettings & settings, const DB::ISerialization::StreamCallback & callback, - const DB::ISerialization::SubstreamData &) const + const DB::ISerialization::SubstreamData & data) const { settings.path.push_back(Substream::DynamicStructure); callback(settings.path); settings.path.pop_back(); - /// We don't know if we have actually have this variant in Dynamic column, + /// If we didn't deserialize prefix yet, we don't know if we actually have this variant in Dynamic column, /// so we cannot enumerate variant streams. + if (!data.deserialize_prefix_state) + return; + + auto * deserialize_prefix_state = checkAndGetState(data.deserialize_prefix_state); + /// If we don't have this variant, no need to enumerate streams for it as we won't read from any stream. + if (!deserialize_prefix_state->variant_serialization) + return; + + settings.path.push_back(Substream::DynamicData); + auto variant_data = SubstreamData(deserialize_prefix_state->variant_serialization) + .withType(data.type) + .withColumn(data.column) + .withSerializationInfo(data.serialization_info) + .withDeserializePrefix(deserialize_prefix_state->variant_element_state); + deserialize_prefix_state->variant_serialization->enumerateStreams(settings, callback, variant_data); + settings.path.pop_back(); } void SerializationDynamicElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const @@ -39,13 +63,6 @@ void SerializationDynamicElement::serializeBinaryBulkStateSuffix(SerializeBinary ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationDynamicElement"); } -struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState -{ - ISerialization::DeserializeBinaryBulkStatePtr structure_state; - SerializationPtr variant_serialization; - ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; -}; - void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index dac4fbe88e0..cda82f31820 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -398,7 +398,8 @@ void SerializationMap::enumerateStreams( auto next_data = SubstreamData(nested) .withType(data.type ? assert_cast(*data.type).getNestedType() : nullptr) .withColumn(data.column ? assert_cast(*data.column).getNestedColumnPtr() : nullptr) - .withSerializationInfo(data.serialization_info); + .withSerializationInfo(data.serialization_info) + .withDeserializePrefix(data.deserialize_prefix_state); nested->enumerateStreams(settings, callback, next_data); } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index bb7c19aa78d..6e4b4c4c533 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -549,26 +549,6 @@ bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & is return tryDeserializeText(column, rb, settings, true); } -void SerializationTuple::enumerateStreams( - EnumerateStreamsSettings & settings, - const StreamCallback & callback, - const SubstreamData & data) const -{ - const auto * type_tuple = data.type ? &assert_cast(*data.type) : nullptr; - const auto * column_tuple = data.column ? &assert_cast(*data.column) : nullptr; - const auto * info_tuple = data.serialization_info ? &assert_cast(*data.serialization_info) : nullptr; - - for (size_t i = 0; i < elems.size(); ++i) - { - auto next_data = SubstreamData(elems[i]) - .withType(type_tuple ? type_tuple->getElement(i) : nullptr) - .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr) - .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr); - - elems[i]->enumerateStreams(settings, callback, next_data); - } -} - struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState { std::vector states; @@ -579,6 +559,27 @@ struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinar std::vector states; }; +void SerializationTuple::enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const +{ + const auto * type_tuple = data.type ? &assert_cast(*data.type) : nullptr; + const auto * column_tuple = data.column ? &assert_cast(*data.column) : nullptr; + const auto * info_tuple = data.serialization_info ? &assert_cast(*data.serialization_info) : nullptr; + const auto * tuple_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + + for (size_t i = 0; i < elems.size(); ++i) + { + auto next_data = SubstreamData(elems[i]) + .withType(type_tuple ? type_tuple->getElement(i) : nullptr) + .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr) + .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr) + .withDeserializePrefix(tuple_deserialize_prefix_state ? tuple_deserialize_prefix_state->states[i] : nullptr); + + elems[i]->enumerateStreams(settings, callback, next_data); + } +} void SerializationTuple::serializeBinaryBulkStatePrefix( const IColumn & column, diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 3fe26b773e3..8e0ef112444 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -28,6 +28,16 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } +struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState +{ + std::vector states; +}; + +struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState +{ + std::vector states; +}; + void SerializationVariant::enumerateStreams( EnumerateStreamsSettings & settings, const StreamCallback & callback, @@ -35,6 +45,7 @@ void SerializationVariant::enumerateStreams( { const auto * type_variant = data.type ? &assert_cast(*data.type) : nullptr; const auto * column_variant = data.column ? &assert_cast(*data.column) : nullptr; + const auto * variant_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; auto discriminators_serialization = std::make_shared(std::make_shared>(), "discr", SubstreamType::NamedVariantDiscriminators); auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr; @@ -59,7 +70,8 @@ void SerializationVariant::enumerateStreams( auto variant_data = SubstreamData(variants[i]) .withType(type_variant ? type_variant->getVariant(i) : nullptr) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) - .withSerializationInfo(data.serialization_info); + .withSerializationInfo(data.serialization_info) + .withDeserializePrefix(variant_deserialize_prefix_state ? variant_deserialize_prefix_state->states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -70,16 +82,6 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } -struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState -{ - std::vector states; -}; - -struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState -{ - std::vector states; -}; - void SerializationVariant::serializeBinaryBulkStatePrefix( const IColumn & column, SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 4f120ecac06..0e1ad81ce5b 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -12,34 +12,6 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -void SerializationVariantElement::enumerateStreams( - DB::ISerialization::EnumerateStreamsSettings & settings, - const DB::ISerialization::StreamCallback & callback, - const DB::ISerialization::SubstreamData & data) const -{ - /// We will need stream for discriminators during deserialization. - settings.path.push_back(Substream::VariantDiscriminators); - callback(settings.path); - settings.path.pop_back(); - - addVariantToPath(settings.path); - settings.path.back().data = data; - nested_serialization->enumerateStreams(settings, callback, data); - removeVariantFromPath(settings.path); -} - -void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const -{ - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement"); -} - -void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const -{ - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement"); -} - struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { /// During deserialization discriminators and variant streams can be shared. @@ -56,6 +28,40 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; +void SerializationVariantElement::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData & data) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); + + const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + addVariantToPath(settings.path); + auto nested_data = SubstreamData(nested_serialization) + .withType(data.type ? removeNullableOrLowCardinalityNullable(data.type) : nullptr) + .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr) + .withSerializationInfo(data.serialization_info) + .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_element_state : nullptr); + settings.path.back().data = data; + nested_serialization->enumerateStreams(settings, callback, data); + removeVariantFromPath(settings.path); +} + +void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement"); +} + +void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement"); +} + void SerializationVariantElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { @@ -82,7 +88,6 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( { auto * variant_element_state = checkAndGetState(state); - size_t variant_limit = 0; /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) @@ -99,30 +104,17 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); -// ColumnVariant::Discriminator discr; -// readBinaryLittleEndian(discr, *discriminators_stream); -// if (discr == ColumnVariant::NULL_DISCRIMINATOR) -// { SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); -// } -// else -// { -// auto & discriminators_data = assert_cast(*variant_element_state->discriminators->assumeMutable()).getData(); -// discriminators_data.resize_fill(discriminators_data.size() + limit, discr); -// } - addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } settings.path.pop_back(); + /// Iterate through new discriminators to calculate the limit for our variant. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - /// Iterate through new discriminators to calculate the limit for our variant. - if (!variant_limit) - { - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); - } + size_t variant_limit = 0; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); /// Now we know the limit for our variant and can deserialize it. diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index c8bf12436b0..d18d5eec975 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -334,8 +334,7 @@ void MergeTreeReaderWide::prefetchForColumn( ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache) { deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache); - - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + auto callback = [&](const ISerialization::SubstreamPath & substream_path) { auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums()); @@ -348,7 +347,11 @@ void MergeTreeReaderWide::prefetchForColumn( prefetched_streams.insert(*stream_name); } } - }); + }; + + auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializePrefix(deserialize_binary_bulk_state_map[name_and_type.name]); + ISerialization::EnumerateStreamsSettings settings; + serialization->enumerateStreams(settings, callback, data); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index fb3e318687a..5e388d6a8ac 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -779,13 +779,7 @@ static NameToNameVector collectFilesForRenames( }; if (auto serialization = source_part->tryGetSerialization(command.column_name)) - { - auto name_and_type = source_part->getColumn(command.column_name); - ColumnPtr column_sample; - if (name_and_type.type->hasDynamicSubcolumns()) - column_sample = source_part->readColumnSample(name_and_type); - serialization->enumerateStreams(callback, name_and_type.type, column_sample); - } + serialization->enumerateStreams(callback); /// if we drop a column with statistic, we should also drop the stat file. if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) @@ -821,13 +815,7 @@ static NameToNameVector collectFilesForRenames( }; if (auto serialization = source_part->tryGetSerialization(command.column_name)) - { - auto name_and_type = source_part->getColumn(command.column_name); - ColumnPtr column_sample; - if (name_and_type.type->hasDynamicSubcolumns()) - column_sample = source_part->readColumnSample(name_and_type); - serialization->enumerateStreams(callback, name_and_type.type, column_sample); - } + serialization->enumerateStreams(callback); /// if we rename a column with statistic, we should also rename the stat file. if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference index af91add9ddd..45f94f7ecc4 100644 --- a/tests/queries/0_stateless/03034_dynamic_conversions.reference +++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference @@ -61,3 +61,28 @@ str_5 String \N None 4 UInt64 1970-01-06 Date +0 +42 +42.42 +1 +0 +\N +42 +42.42 +1 +0 + +42 +42.42 +true +e10 +\N +42 +42.42 +true +e10 +\N +42 +\N +1 +\N diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql index e9b4944f5d8..ed75fbf2377 100644 --- a/tests/queries/0_stateless/03034_dynamic_conversions.sql +++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql @@ -22,3 +22,13 @@ select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(numb select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6); +create table test (d Dynamic) engine = Memory; +insert into test values (NULL), (42), ('42.42'), (true), ('e10'); +select d::Float64 from test; +select d::Nullable(Float64) from test; +select d::String from test; +select d::Nullable(String) from test; +select d::UInt64 from test; -- {serverError CANNOT_PARSE_TEXT} +select d::Nullable(UInt64) from test; +select d::Date from test; -- {serverError CANNOT_PARSE_DATE} + diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.reference b/tests/queries/0_stateless/03037_dynamic_merges_1.reference index fff812f0396..0a647b41c4b 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1.reference +++ b/tests/queries/0_stateless/03037_dynamic_merges_1.reference @@ -1,5 +1,5 @@ MergeTree compact + horizontal merge -test1 +test 50000 DateTime 60000 Date 70000 Array(UInt16) @@ -20,8 +20,8 @@ test1 200000 Map(UInt64, UInt64) 260000 String 10000 Tuple(UInt64, UInt64) -100000 UInt64 100000 None +100000 UInt64 200000 Map(UInt64, UInt64) 260000 String 100000 None @@ -29,7 +29,7 @@ test1 200000 Map(UInt64, UInt64) 270000 String MergeTree wide + horizontal merge -test1 +test 50000 DateTime 60000 Date 70000 Array(UInt16) @@ -41,8 +41,8 @@ test1 100000 UInt64 190000 String 70000 Array(UInt16) -100000 UInt64 100000 None +100000 UInt64 190000 String 200000 Map(UInt64, UInt64) 100000 None @@ -50,8 +50,8 @@ test1 200000 Map(UInt64, UInt64) 260000 String 10000 Tuple(UInt64, UInt64) -100000 UInt64 100000 None +100000 UInt64 200000 Map(UInt64, UInt64) 260000 String 100000 None @@ -59,7 +59,7 @@ test1 200000 Map(UInt64, UInt64) 270000 String MergeTree compact + vertical merge -test1 +test 50000 DateTime 60000 Date 70000 Array(UInt16) @@ -71,8 +71,8 @@ test1 100000 UInt64 190000 String 70000 Array(UInt16) -100000 UInt64 100000 None +100000 UInt64 190000 String 200000 Map(UInt64, UInt64) 100000 None @@ -84,12 +84,12 @@ test1 100000 UInt64 200000 Map(UInt64, UInt64) 260000 String -100000 UInt64 100000 None +100000 UInt64 200000 Map(UInt64, UInt64) 270000 String MergeTree wide + vertical merge -test1 +test 50000 DateTime 60000 Date 70000 Array(UInt16) diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.sh b/tests/queries/0_stateless/03037_dynamic_merges_1.sh index cf524fb9393..056f6702727 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_1.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_1.sh @@ -21,35 +21,36 @@ function test() $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "system stop merges test" $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "system stop merges test" $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" } $CH_CLIENT -q "drop table if exists test;" echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;" test $CH_CLIENT -q "drop table test;" echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;" test $CH_CLIENT -q "drop table test;" + echo "MergeTree compact + vertical merge" $CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" test diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.reference b/tests/queries/0_stateless/03037_dynamic_merges_2.reference new file mode 100644 index 00000000000..420b8185b16 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree wide + horizontal merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree compact + vertical merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 +MergeTree wide + vertical merge +test +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh index e9d571c2104..40adbdd4262 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_2.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_2.sh @@ -19,7 +19,7 @@ function test() $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)" $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" } $CH_CLIENT -q "drop table if exists test;" diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh b/tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled similarity index 100% rename from tests/queries/0_stateless/03040_dynamic_type_alters.sh rename to tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled From 671650bd2eaf2a07d5e6f517b40905c71ce798b6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 28 Apr 2024 12:18:24 +0200 Subject: [PATCH 102/651] Cleanup --- src/Backups/BackupIO_AzureBlobStorage.cpp | 4 ++-- src/Storages/ObjectStorage/Azure/Configuration.h | 16 ++++++++++------ .../ObjectStorage/DataLakes/IStorageDataLake.h | 4 +--- src/Storages/ObjectStorage/HDFS/Configuration.h | 11 +++++++---- src/Storages/ObjectStorage/S3/Configuration.h | 10 ++++++---- .../ObjectStorage/StorageObjectStorage.cpp | 16 +++------------- .../ObjectStorage/StorageObjectStorage.h | 5 +---- .../ObjectStorage/StorageObjectStorageSource.cpp | 9 ++++----- .../ObjectStorage/StorageObjectStorageSource.h | 5 +---- .../registerStorageObjectStorage.cpp | 3 +-- src/Storages/S3Queue/StorageS3Queue.cpp | 6 ++---- 11 files changed, 38 insertions(+), 51 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index f00da686c18..3af66e5470f 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , configuration(configuration_) { - auto client_ptr = configuration.createClient(/* is_read_only */ false, /* attempt_to_create_container */true); + auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true); object_storage = std::make_unique("BackupReaderAzureBlobStorage", std::move(client_ptr), configuration.createSettings(context_), @@ -121,7 +121,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , configuration(configuration_) { - auto client_ptr = configuration.createClient(/* is_read_only */ false, attempt_to_create_container); + auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), configuration.createSettings(context_), diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 91a9a0bbbd5..1591cb42469 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -3,7 +3,6 @@ #include "config.h" #if USE_AZURE_BLOB_STORAGE - #include #include #include @@ -36,20 +35,25 @@ public: const Paths & getPaths() const override { return blobs_paths; } void setPaths(const Paths & paths) override { blobs_paths = paths; } - String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; } String getNamespace() const override { return container; } + String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; - ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT ConfigurationPtr clone() override { return std::make_shared(*this); } - void fromNamedCollection(const NamedCollection & collection) override; - void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + void addStructureAndFormatToArgs( - ASTs & args, const String & structure_, const String & format_, ContextPtr context) override; + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) override; protected: + void fromNamedCollection(const NamedCollection & collection) override; + void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; + using AzureClient = Azure::Storage::Blobs::BlobContainerClient; using AzureClientPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 3119b844aaf..83865c47eb8 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -38,7 +38,7 @@ public: std::optional format_settings_, LoadingStrictnessLevel mode) { - auto object_storage = base_configuration->createObjectStorage(context); + auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; @@ -96,8 +96,6 @@ public: void updateConfiguration(ContextPtr local_context) override { - std::lock_guard lock(Storage::configuration_update_mutex); - Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index cac09ee1d92..dc06e754c44 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -28,19 +28,22 @@ public: const Paths & getPaths() const override { return paths; } void setPaths(const Paths & paths_) override { paths = paths_; } + std::string getPathWithoutGlobs() const override; String getNamespace() const override { return ""; } String getDataSourceDescription() override { return url; } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; - ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT ConfigurationPtr clone() override { return std::make_shared(*this); } - void addStructureAndFormatToArgs( - ASTs & args, const String & structure_, const String & format_, ContextPtr context) override; + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; - std::string getPathWithoutGlobs() const override; + void addStructureAndFormatToArgs( + ASTs & args, + const String & structure_, + const String & format_, + ContextPtr context) override; private: void fromNamedCollection(const NamedCollection &) override; diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index 9eb724c4a64..b28b1c226a7 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -3,7 +3,6 @@ #include "config.h" #if USE_AWS_S3 - #include #include @@ -35,13 +34,16 @@ public: void check(ContextPtr context) const override; void validateNamespace(const String & name) const override; - ConfigurationPtr clone() override { return std::make_shared(*this); } bool isStaticConfiguration() const override { return static_configuration; } - ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT + ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override; + void addStructureAndFormatToArgs( - ASTs & args, const String & structure, const String & format, ContextPtr context) override; + ASTs & args, + const String & structure, + const String & format, + ContextPtr context) override; private: void fromNamedCollection(const NamedCollection & collection) override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 2c9831f0d29..a187a8fc54d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -91,6 +91,7 @@ bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) c void StorageObjectStorage::updateConfiguration(ContextPtr context) { + /// FIXME: we should be able to update everything apart from client if static_configuration == true. if (!configuration->isStaticConfiguration()) object_storage->applyNewSettings(context->getConfigRef(), "s3.", context); } @@ -113,7 +114,6 @@ public: const std::optional & format_settings_, bool distributed_processing_, ReadFromFormatInfo info_, - SchemaCache & schema_cache_, const bool need_only_count_, ContextPtr context_, size_t max_block_size_, @@ -121,11 +121,9 @@ public: : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_) , object_storage(object_storage_) , configuration(configuration_) - , schema_cache(schema_cache_) , info(std::move(info_)) , virtual_columns(virtual_columns_) , format_settings(format_settings_) - , query_settings(configuration->getQuerySettings(context_)) , name(name_ + "Source") , need_only_count(need_only_count_) , max_block_size(max_block_size_) @@ -154,8 +152,8 @@ public: for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( - getName(), object_storage, configuration, info, format_settings, query_settings, - context, max_block_size, iterator_wrapper, need_only_count, schema_cache); + getName(), object_storage, configuration, info, format_settings, + context, max_block_size, iterator_wrapper, need_only_count); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -175,12 +173,10 @@ private: ObjectStoragePtr object_storage; ConfigurationPtr configuration; std::shared_ptr iterator_wrapper; - SchemaCache & schema_cache; const ReadFromFormatInfo info; const NamesAndTypesList virtual_columns; const std::optional format_settings; - const StorageObjectStorage::QuerySettings query_settings; const String name; const bool need_only_count; const size_t max_block_size; @@ -233,7 +229,6 @@ void StorageObjectStorage::read( format_settings, distributed_processing, read_from_format_info, - getSchemaCache(local_context), need_only_count, local_context, max_block_size, @@ -371,11 +366,6 @@ std::pair StorageObjectStorage::resolveSchemaAn return std::pair(columns, format); } -SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context) -{ - return getSchemaCache(context, configuration->getTypeName()); -} - SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name) { if (storage_type_name == "s3") diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 46d422b26c2..3f8ff79ad54 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -92,8 +92,6 @@ public: bool parallelizeOutputAfterReading(ContextPtr context) const override; - SchemaCache & getSchemaCache(const ContextPtr & context); - static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name); static ColumnsDescription resolveSchemaFromData( @@ -132,7 +130,6 @@ protected: const bool distributed_processing; LoggerPtr log; - std::mutex configuration_update_mutex; }; class StorageObjectStorage::Configuration @@ -175,7 +172,7 @@ public: virtual void check(ContextPtr context) const; virtual void validateNamespace(const String & /* name */) const {} - virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT + virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) = 0; virtual ConfigurationPtr clone() = 0; virtual bool isStaticConfiguration() const { return true; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b224afb7a58..cb3f732ce83 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -44,19 +44,16 @@ StorageObjectStorageSource::StorageObjectStorageSource( ConfigurationPtr configuration_, const ReadFromFormatInfo & info, std::optional format_settings_, - const StorageObjectStorage::QuerySettings & query_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - bool need_only_count_, - SchemaCache & schema_cache_) + bool need_only_count_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) , object_storage(object_storage_) , configuration(configuration_) , format_settings(format_settings_) - , query_settings(query_settings_) , max_block_size(max_block_size_) , need_only_count(need_only_count_) , read_from_format_info(info) @@ -67,7 +64,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( 1/* max_threads */)) , columns_desc(info.columns_description) , file_iterator(file_iterator_) - , schema_cache(schema_cache_) + , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) { } @@ -229,6 +226,8 @@ std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const O StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) { ObjectInfoPtr object_info; + auto query_settings = configuration->getQuerySettings(getContext()); + do { object_info = file_iterator->next(processor); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 356478422bc..a8df00bc0ac 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -32,12 +32,10 @@ public: ConfigurationPtr configuration, const ReadFromFormatInfo & info, std::optional format_settings_, - const StorageObjectStorage::QuerySettings & query_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, - bool need_only_count_, - SchemaCache & schema_cache_); + bool need_only_count_); ~StorageObjectStorageSource() override; @@ -62,7 +60,6 @@ protected: ObjectStoragePtr object_storage; const ConfigurationPtr configuration; const std::optional format_settings; - const StorageObjectStorage::QuerySettings query_settings; const UInt64 max_block_size; const bool need_only_count; const ReadFromFormatInfo read_from_format_info; diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index c23b180215e..74c8aeaad7d 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -59,7 +58,7 @@ static std::shared_ptr createStorageObjectStorage( return std::make_shared( configuration, - configuration->createObjectStorage(context), + configuration->createObjectStorage(context, /* is_readonly */false), args.getContext(), args.table_id, args.columns, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 38934a7895a..b9c67c7d801 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -138,7 +138,7 @@ StorageS3Queue::StorageS3Queue( checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); - object_storage = configuration->createObjectStorage(context_); + object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); configuration->check(context_); @@ -361,12 +361,10 @@ std::shared_ptr StorageS3Queue::createSource( configuration, info, format_settings, - configuration->getQuerySettings(local_context), local_context, max_block_size, file_iterator, - false, - StorageObjectStorage::getSchemaCache(local_context, configuration->getTypeName())); + false); auto file_deleter = [=, this](const std::string & path) mutable { From 3e68103ac84e0c3d59759f5af5d615191a6a59e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 26 Apr 2024 09:11:20 +0200 Subject: [PATCH 103/651] Fix interserver secret for Distributed over Distributed from remote() Right if you are executing remote() and later the query will go to the cluster with interserver secret, then you should have the same user on the nodes from that cluster, otherwise the query will fail with: DB::NetException: Connection reset by peer And on the remote node: TCPHandler: User (initial, interserver mode): new_user (client: 172.16.1.5:40536) TCP_INTERSERVER-Session: d29ecf7d-2c1c-44d2-8cc9-4ab08175bf05 Authentication failed with error: new_user: Authentication failed: password is incorrect, or there is no user with such name. ServerErrorHandler: Code: 516. DB::Exception: new_user: Authentication failed: password is incorrect, or there is no user with such name. (AUTHENTICATION_FAILED), Stack trace (when copying this message, always include the lines below): The problem is that remote() will not use passed to it user in any form, and instead, the initial user will be used, i.e. "cli_user" not "query_user": chc --user cli_user -q "select * from remote(node, default, some_dist_table, 'query_user')" Fix this by using the user from query for the remote(). Note, that the Distributed over Distributed in case of tables still wont work, for this you have to have the same users on all nodes in all clusters that are involved in case of interserver secret is enabled (see also test). Signed-off-by: Azat Khuzhin v2: move client initial_user adjustment into ClusterProxy/executeQuery.cpp v3: we cannot check for interserver_mode in updateSettingsAndClientInfoForCluster() since it is not yet interserver in remote() context --- .../ClusterProxy/executeQuery.cpp | 32 ++++++++++++-- src/Interpreters/ClusterProxy/executeQuery.h | 11 ++--- src/Storages/StorageDistributed.cpp | 3 +- .../configs/users.d/new_user.xml | 12 +++++ .../test.py | 44 ++++++++++++++++--- 5 files changed, 83 insertions(+), 19 deletions(-) create mode 100644 tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index f7727f70ff7..2af33421add 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -38,7 +38,8 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, +ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, + bool is_remote_function, ContextPtr context, const Settings & settings, const StorageID & main_table, @@ -46,9 +47,17 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, LoggerPtr log, const DistributedSettings * distributed_settings) { + ClientInfo new_client_info = context->getClientInfo(); Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); + /// In case of interserver mode we should reset initial_user for remote() function to use passed user from the query. + if (is_remote_function) + { + const auto & address = cluster.getShardsAddresses().front().front(); + new_client_info.initial_user = address.user; + } + /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. @@ -168,9 +177,23 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); + new_context->setClientInfo(new_client_info); return new_context; } +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table) +{ + return updateSettingsAndClientInfoForCluster(cluster, + /* is_remote_function= */ false, + context, + settings, + main_table, + /* additional_filter_ast= */ {}, + /* log= */ {}, + /* distributed_settings= */ {}); +} + + static ThrottlerPtr getThrottler(const ContextPtr & context) { const Settings & settings = context->getSettingsRef(); @@ -209,7 +232,8 @@ void executeQuery( const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator) + AdditionalShardFilterGenerator shard_filter_generator, + bool is_remote_function) { const Settings & settings = context->getSettingsRef(); @@ -222,8 +246,8 @@ void executeQuery( SelectStreamFactory::Shards remote_shards; auto cluster = query_info.getCluster(); - auto new_context = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log, - &distributed_settings); + auto new_context = updateSettingsAndClientInfoForCluster(*cluster, is_remote_function, context, + settings, main_table, query_info.additional_filter_ast, log, &distributed_settings); if (context->getSettingsRef().allow_experimental_parallel_reading_from_replicas && context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value != new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 8f6f6300c7b..3734a237d19 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -38,13 +38,7 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, - ContextPtr context, - const Settings & settings, - const StorageID & main_table, - ASTPtr additional_filter_ast = nullptr, - LoggerPtr log = nullptr, - const DistributedSettings * distributed_settings = nullptr); +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. @@ -63,7 +57,8 @@ void executeQuery( const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator); + AdditionalShardFilterGenerator shard_filter_generator, + bool is_remote_function); void executeQueryWithParallelReplicas( diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 69d3cf3ad3b..a4f51f1f587 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -927,7 +927,8 @@ void StorageDistributed::read( sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator); + additional_shard_filter_generator, + /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) diff --git a/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml b/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml new file mode 100644 index 00000000000..a747d61a0dd --- /dev/null +++ b/tests/integration/test_distributed_inter_server_secret/configs/users.d/new_user.xml @@ -0,0 +1,12 @@ + + + + + + ::/0 + + default + default + + + diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 10dbb23d961..50d7be4d11e 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -12,12 +12,16 @@ from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) -def make_instance(name, cfg, *args, **kwargs): +def make_instance(name, *args, **kwargs): + main_configs = kwargs.pop("main_configs", []) + main_configs.append("configs/remote_servers.xml") + user_configs = kwargs.pop("user_configs", []) + user_configs.append("configs/users.xml") return cluster.add_instance( name, with_zookeeper=True, - main_configs=["configs/remote_servers.xml", cfg], - user_configs=["configs/users.xml"], + main_configs=main_configs, + user_configs=user_configs, *args, **kwargs, ) @@ -27,11 +31,16 @@ def make_instance(name, cfg, *args, **kwargs): assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" # _n1/_n2 contains cluster with different -- should fail -n1 = make_instance("n1", "configs/remote_servers_n1.xml") -n2 = make_instance("n2", "configs/remote_servers_n2.xml") +# only n1 contains new_user +n1 = make_instance( + "n1", + main_configs=["configs/remote_servers_n1.xml"], + user_configs=["configs/users.d/new_user.xml"], +) +n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) backward = make_instance( "backward", - "configs/remote_servers_backward.xml", + main_configs=["configs/remote_servers_backward.xml"], image="clickhouse/clickhouse-server", # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, @@ -100,6 +109,12 @@ def bootstrap(): ) """ ) + n.query( + """ + CREATE TABLE dist_over_dist_secure AS data + Engine=Distributed(secure, currentDatabase(), dist_secure, key) + """ + ) @pytest.fixture(scope="module", autouse=True) @@ -432,3 +447,20 @@ def test_user_secure_cluster_from_backward(user, password): assert n1.contains_in_log( "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." ) + + +def test_secure_cluster_distributed_over_distributed_different_users(): + # This works because we will have initial_user='default' + n1.query( + "SELECT * FROM remote('n1', currentDatabase(), dist_secure)", user="new_user" + ) + # While this is broken because now initial_user='new_user', and n2 does not has it + with pytest.raises(QueryRuntimeException): + n2.query( + "SELECT * FROM remote('n1', currentDatabase(), dist_secure, 'new_user')" + ) + # And this is still a problem, let's assume that this is OK, since we are + # expecting that in case of dist-over-dist the clusters are the same (users + # and stuff). + with pytest.raises(QueryRuntimeException): + n1.query("SELECT * FROM dist_over_dist_secure", user="new_user") From d076479097d32431b6ca56e8e5d2108d31b39787 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 29 Apr 2024 10:14:41 +0000 Subject: [PATCH 104/651] t --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 70 ++++++++++------------- src/Analyzer/TableFunctionNode.cpp | 6 ++ src/Analyzer/TableNode.cpp | 9 ++- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a3c60c76132..ea3c04a5d49 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3389,19 +3389,15 @@ private: const ContextPtr & context; }; -/// Compare resolved identifiers considering columns that become nullable after JOIN bool resolvedIdenfiersFromJoinAreEquals( const QueryTreeNodePtr & left_resolved_identifier, - const QueryTreeNodePtr & right_resolved_identifier, - const IdentifierResolveScope & scope) + const QueryTreeNodePtr & right_resolved_identifier) { - auto left_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(left_resolved_identifier, scope.join_columns_with_changed_types); - const auto & left_resolved_to_compare = left_original_node ? left_original_node : left_resolved_identifier; + auto * left_resolved_column_to_compare = left_resolved_identifier->as(); + auto * right_resolved_column_to_compare = right_resolved_identifier->as(); - auto right_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(right_resolved_identifier, scope.join_columns_with_changed_types); - const auto & right_resolved_to_compare = right_original_node ? right_original_node : right_resolved_identifier; - - return left_resolved_to_compare->isEqual(*right_resolved_to_compare, IQueryTreeNode::CompareOptions{.compare_aliases = false}); + return left_resolved_column_to_compare && right_resolved_column_to_compare + && left_resolved_column_to_compare->getColumnName() == right_resolved_column_to_compare->getColumnName(); } QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup, @@ -3544,42 +3540,34 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo resolved_identifier = std::move(result_column_node); } - else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier, scope)) + else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier)) { const auto & identifier_path_part = identifier_lookup.identifier.front(); - auto * left_resolved_identifier_column = left_resolved_identifier->as(); - auto * right_resolved_identifier_column = right_resolved_identifier->as(); + const auto & left_resolved_identifier_column = left_resolved_identifier->as(); + const auto & right_resolved_identifier_column = right_resolved_identifier->as(); - if (left_resolved_identifier_column && right_resolved_identifier_column) + const auto & left_column_source_alias = left_resolved_identifier_column.getColumnSource()->getAlias(); + const auto & right_column_source_alias = right_resolved_identifier_column.getColumnSource()->getAlias(); + + /** If column from right table was resolved using alias, we prefer column from right table. + * + * Example: SELECT dummy FROM system.one JOIN system.one AS A ON A.dummy = system.one.dummy; + * + * If alias is specified for left table, and alias is not specified for right table and identifier was resolved + * without using left table alias, we prefer column from right table. + * + * Example: SELECT dummy FROM system.one AS A JOIN system.one ON A.dummy = system.one.dummy; + * + * Otherwise we prefer column from left table. + */ + bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias; + bool column_resolved_without_using_left_alias = !left_column_source_alias.empty() + && right_column_source_alias.empty() + && identifier_path_part != left_column_source_alias; + if (column_resolved_using_right_alias || column_resolved_without_using_left_alias) { - const auto & left_column_source_alias = left_resolved_identifier_column->getColumnSource()->getAlias(); - const auto & right_column_source_alias = right_resolved_identifier_column->getColumnSource()->getAlias(); - - /** If column from right table was resolved using alias, we prefer column from right table. - * - * Example: SELECT dummy FROM system.one JOIN system.one AS A ON A.dummy = system.one.dummy; - * - * If alias is specified for left table, and alias is not specified for right table and identifier was resolved - * without using left table alias, we prefer column from right table. - * - * Example: SELECT dummy FROM system.one AS A JOIN system.one ON A.dummy = system.one.dummy; - * - * Otherwise we prefer column from left table. - */ - bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias; - bool column_resolved_without_using_left_alias = !left_column_source_alias.empty() - && right_column_source_alias.empty() - && identifier_path_part != left_column_source_alias; - if (column_resolved_using_right_alias || column_resolved_without_using_left_alias) - { - resolved_side = JoinTableSide::Right; - resolved_identifier = right_resolved_identifier; - } - else - { - resolved_side = JoinTableSide::Left; - resolved_identifier = left_resolved_identifier; - } + resolved_side = JoinTableSide::Right; + resolved_identifier = right_resolved_identifier; } else { diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index 87d2fdcffb5..5b1711fd69f 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -94,6 +94,12 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) if (settings_changes != rhs_typed.settings_changes) return false; + /// For table functions, we should always compare aliases + /// because entries of table functions with the same name and arguments but with different aliases in a query + /// refer to different reads. + if (getAlias() != rhs_typed.getAlias()) + return false; + return table_expression_modifiers == rhs_typed.table_expression_modifiers; } diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index 11d1a280a56..b95e85db35d 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -55,8 +55,13 @@ void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s bool TableNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); - return storage_id == rhs_typed.storage_id && table_expression_modifiers == rhs_typed.table_expression_modifiers && - temporary_table_name == rhs_typed.temporary_table_name && getAlias() == rhs_typed.getAlias(); + return storage_id == rhs_typed.storage_id + && table_expression_modifiers == rhs_typed.table_expression_modifiers + && temporary_table_name == rhs_typed.temporary_table_name + /// For tables, we should always compare aliases + /// because entries of tables with the same name but with different aliases in a query + /// refer to different reads. + && getAlias() == rhs_typed.getAlias(); } void TableNode::updateTreeHashImpl(HashState & state, CompareOptions) const From 1ccae23170f7668b56a44cb3063e86530f32ce10 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 29 Apr 2024 17:05:31 +0000 Subject: [PATCH 105/651] Fix alter modify column for dynamic columns, make check part work for dynamic columns, fix style errors and tests --- src/Columns/ColumnDynamic.cpp | 5 --- src/Columns/ColumnDynamic.h | 7 +--- src/Core/SettingsChangesHistory.h | 2 + src/DataTypes/DataTypeVariant.cpp | 2 - src/DataTypes/IDataType.h | 6 +++ src/DataTypes/Serializations/ISerialization.h | 10 +++-- .../Serializations/SerializationArray.cpp | 2 +- .../Serializations/SerializationDynamic.cpp | 32 ++++++-------- .../SerializationDynamicElement.cpp | 12 +++--- .../Serializations/SerializationMap.cpp | 2 +- .../Serializations/SerializationTuple.cpp | 4 +- .../Serializations/SerializationVariant.cpp | 22 ++++++++-- .../Serializations/SerializationVariant.h | 8 ++++ .../SerializationVariantElement.cpp | 4 +- src/Functions/FunctionsConversion.cpp | 29 ++++--------- src/Functions/dynamicElement.cpp | 42 +++++++++++-------- src/Functions/variantElement.cpp | 4 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 ++- src/Storages/MergeTree/IMergeTreeDataPart.h | 4 +- .../MergeTreeDataPartWriterCompact.cpp | 28 +++++++++---- .../MergeTreeDataPartWriterCompact.h | 4 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 40 +++++++++++------- .../MergeTree/MergeTreeDataPartWriterWide.h | 4 +- .../MergeTree/MergeTreeReaderWide.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 21 +++++++++- src/Storages/MergeTree/checkDataPart.cpp | 2 +- ....disabled => 03040_dynamic_type_alters.sh} | 0 27 files changed, 180 insertions(+), 124 deletions(-) rename tests/queries/0_stateless/{03040_dynamic_type_alters.sh.disabled => 03040_dynamic_type_alters.sh} (100%) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 3074504973a..f3dff01af25 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -10,7 +10,6 @@ #include #include -#include namespace DB { @@ -687,7 +686,6 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so } size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name); - LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size); it->second += size; } } @@ -700,10 +698,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so std::vector> variants_with_sizes; variants_with_sizes.reserve(all_variants.size()); for (const auto & variant : all_variants) - { - LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]); variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); - } std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); /// Take first max_dynamic_types variants from sorted list. diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 7487a5aa0db..b5167f4b9d9 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -9,11 +9,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - /** * Column for storing Dynamic type values. * Dynamic column allows to insert and store values of any data types inside. @@ -340,7 +335,7 @@ private: /// Combine current variant with the other variant and return global discriminators mapping /// from other variant to the combined one. It's used for inserting from /// different variants. - /// Returns nullptr if maximum number of Variants is reached and tne new Variant cannot be created. + /// Returns nullptr if maximum number of Variants is reached and the new Variant cannot be created. std::vector * combineVariants(const VariantInfo & other_variant_info); void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type); diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index d3b5de06e70..42cda26d73c 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.5", {{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, + {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}}}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp index b918b79a2ed..6478bd598f1 100644 --- a/src/DataTypes/DataTypeVariant.cpp +++ b/src/DataTypes/DataTypeVariant.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -18,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int EMPTY_DATA_PASSED; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index dde61ca3a48..46c30240ef8 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -11,6 +11,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + class ReadBuffer; class WriteBuffer; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index ddbed34f614..b233230f9cc 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -138,9 +138,9 @@ public: return *this; } - SubstreamData & withDeserializePrefix(DeserializeBinaryBulkStatePtr deserialize_prefix_state_) + SubstreamData & withDeserializeState(DeserializeBinaryBulkStatePtr deserialize_state_) { - deserialize_prefix_state = std::move(deserialize_prefix_state_); + deserialize_state = std::move(deserialize_state_); return *this; } @@ -148,7 +148,11 @@ public: DataTypePtr type; ColumnPtr column; SerializationInfoPtr serialization_info; - DeserializeBinaryBulkStatePtr deserialize_prefix_state; + + /// For types with dynamic subcolumns deserialize state contains information + /// about current dynamic structure. And this information can be useful + /// when we call enumerateStreams to enumerate dynamic streams. + DeserializeBinaryBulkStatePtr deserialize_state; }; struct Substream diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 6a8555a3714..ac7b8f4d084 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -255,7 +255,7 @@ void SerializationArray::enumerateStreams( .withType(type_array ? type_array->getNestedType() : nullptr) .withColumn(column_array ? column_array->getDataPtr() : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializePrefix(data.deserialize_prefix_state); + .withDeserializeState(data.deserialize_state); nested->enumerateStreams(settings, callback, next_data); settings.path.pop_back(); diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 858445ed257..5e6106f560f 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -30,15 +31,9 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB ISerialization::SerializeBinaryBulkStatePtr variant_state; /// Variants statistics. Map (Variant name) -> (Variant size). - ColumnDynamic::Statistics statistics = { .source =ColumnDynamic::Statistics::Source::READ }; + ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ }; SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} - - void updateStatistics(const ColumnVariant & column_variant) - { - for (size_t i = 0; i != variant_names.size(); ++i) - statistics.data[variant_names[i]] += column_variant.getVariantPtrByGlobalDiscriminator(i)->size(); - } }; struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState @@ -58,13 +53,13 @@ void SerializationDynamic::enumerateStreams( settings.path.pop_back(); const auto * column_dynamic = data.column ? &assert_cast(*data.column) : nullptr; - const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; - /// If column is nullptr and we didn't deserizlize prefix yet, nothing to enumerate as we don't have any variants. - if (!column_dynamic && !deserialize_prefix_state) + /// If column is nullptr and we don't have deserialize state yet, nothing to enumerate as we don't have any variants. + if (!column_dynamic && !deserialize_state) return; - const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState(deserialize_prefix_state->structure_state)->variant_type; + const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState(deserialize_state->structure_state)->variant_type; auto variant_serialization = variant_type->getDefaultSerialization(); settings.path.push_back(Substream::DynamicData); @@ -72,7 +67,7 @@ void SerializationDynamic::enumerateStreams( .withType(variant_type) .withColumn(column_dynamic ? column_dynamic->getVariantColumnPtr() : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_state : nullptr); + .withDeserializeState(deserialize_state ? deserialize_state->variant_state : nullptr); settings.path.back().data = variant_data; variant_serialization->enumerateStreams(settings, callback, variant_data); settings.path.pop_back(); @@ -124,11 +119,11 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( { size_t size = 0; /// Use statistics from column if it was created during merge. - if (statistics.data.empty() || statistics.source != ColumnDynamic::Statistics::Source::MERGE) - size = variant_column.getVariantByGlobalDiscriminator(i).size(); + if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE) + size = statistics.data.at(variant_info.variant_names[i]); /// Otherwise we can use only variant sizes from current column. else - size = statistics.data.at(variant_info.variant_names[i]); + size = variant_column.getVariantByGlobalDiscriminator(i).size(); writeVarUInt(size, *stream); } } @@ -243,12 +238,9 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams( if (!variant_info.variant_type->equals(*dynamic_state->variant_type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName()); - /// Update statistics. - if (offset == 0) - dynamic_state->updateStatistics(*variant_column); - settings.path.push_back(Substream::DynamicData); - dynamic_state->variant_serialization->serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state); + assert_cast(*dynamic_state->variant_serialization) + .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.data); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index 9be9802d926..059a7d57e4e 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -33,21 +33,21 @@ void SerializationDynamicElement::enumerateStreams( /// If we didn't deserialize prefix yet, we don't know if we actually have this variant in Dynamic column, /// so we cannot enumerate variant streams. - if (!data.deserialize_prefix_state) + if (!data.deserialize_state) return; - auto * deserialize_prefix_state = checkAndGetState(data.deserialize_prefix_state); + auto * deserialize_state = checkAndGetState(data.deserialize_state); /// If we don't have this variant, no need to enumerate streams for it as we won't read from any stream. - if (!deserialize_prefix_state->variant_serialization) + if (!deserialize_state->variant_serialization) return; settings.path.push_back(Substream::DynamicData); - auto variant_data = SubstreamData(deserialize_prefix_state->variant_serialization) + auto variant_data = SubstreamData(deserialize_state->variant_serialization) .withType(data.type) .withColumn(data.column) .withSerializationInfo(data.serialization_info) - .withDeserializePrefix(deserialize_prefix_state->variant_element_state); - deserialize_prefix_state->variant_serialization->enumerateStreams(settings, callback, variant_data); + .withDeserializeState(deserialize_state->variant_element_state); + deserialize_state->variant_serialization->enumerateStreams(settings, callback, variant_data); settings.path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index cda82f31820..10635fb9142 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -399,7 +399,7 @@ void SerializationMap::enumerateStreams( .withType(data.type ? assert_cast(*data.type).getNestedType() : nullptr) .withColumn(data.column ? assert_cast(*data.column).getNestedColumnPtr() : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializePrefix(data.deserialize_prefix_state); + .withDeserializeState(data.deserialize_state); nested->enumerateStreams(settings, callback, next_data); } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 6e4b4c4c533..ef0a75fac40 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -567,7 +567,7 @@ void SerializationTuple::enumerateStreams( const auto * type_tuple = data.type ? &assert_cast(*data.type) : nullptr; const auto * column_tuple = data.column ? &assert_cast(*data.column) : nullptr; const auto * info_tuple = data.serialization_info ? &assert_cast(*data.serialization_info) : nullptr; - const auto * tuple_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + const auto * tuple_deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; for (size_t i = 0; i < elems.size(); ++i) { @@ -575,7 +575,7 @@ void SerializationTuple::enumerateStreams( .withType(type_tuple ? type_tuple->getElement(i) : nullptr) .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr) .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr) - .withDeserializePrefix(tuple_deserialize_prefix_state ? tuple_deserialize_prefix_state->states[i] : nullptr); + .withDeserializeState(tuple_deserialize_state ? tuple_deserialize_state->states[i] : nullptr); elems[i]->enumerateStreams(settings, callback, next_data); } diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index 8e0ef112444..9456ffa3ad3 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -45,7 +45,7 @@ void SerializationVariant::enumerateStreams( { const auto * type_variant = data.type ? &assert_cast(*data.type) : nullptr; const auto * column_variant = data.column ? &assert_cast(*data.column) : nullptr; - const auto * variant_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + const auto * variant_deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; auto discriminators_serialization = std::make_shared(std::make_shared>(), "discr", SubstreamType::NamedVariantDiscriminators); auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr; @@ -71,7 +71,7 @@ void SerializationVariant::enumerateStreams( .withType(type_variant ? type_variant->getVariant(i) : nullptr) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializePrefix(variant_deserialize_prefix_state ? variant_deserialize_prefix_state->states[i] : nullptr); + .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -144,12 +144,13 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( } -void SerializationVariant::serializeBinaryBulkWithMultipleStreams( +void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( const IColumn & column, size_t offset, size_t limit, SerializeBinaryBulkSettings & settings, - SerializeBinaryBulkStatePtr & state) const + SerializeBinaryBulkStatePtr & state, + std::unordered_map & variants_statistics) const { const ColumnVariant & col = assert_cast(column); if (const size_t size = col.size(); limit == 0 || offset + limit > size) @@ -188,6 +189,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( { addVariantElementToPath(settings.path, i); variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); settings.path.pop_back(); } settings.path.pop_back(); @@ -208,6 +210,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( addVariantElementToPath(settings.path, non_empty_global_discr); /// We can use the same offset/limit as for whole Variant column variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); + variants_statistics[variant_names[non_empty_global_discr]] += limit; settings.path.pop_back(); settings.path.pop_back(); return; @@ -247,12 +250,23 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams( variant_offsets_and_limits[i].second, settings, variant_state->states[i]); + variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; settings.path.pop_back(); } } settings.path.pop_back(); } +void SerializationVariant::serializeBinaryBulkWithMultipleStreams( + const DB::IColumn & column, + size_t offset, + size_t limit, + DB::ISerialization::SerializeBinaryBulkSettings & settings, + DB::ISerialization::SerializeBinaryBulkStatePtr & state) const +{ + std::unordered_map tmp_statistics; + serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(column, offset, limit, settings, state, tmp_statistics); +} void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index 0de786f5561..b6aa1534538 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -69,6 +69,14 @@ public: SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override; + void serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state, + std::unordered_map & variants_statistics) const; + void deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 0e1ad81ce5b..dc7fc3b9b35 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -38,13 +38,13 @@ void SerializationVariantElement::enumerateStreams( callback(settings.path); settings.path.pop_back(); - const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState(data.deserialize_prefix_state) : nullptr; + const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; addVariantToPath(settings.path); auto nested_data = SubstreamData(nested_serialization) .withType(data.type ? removeNullableOrLowCardinalityNullable(data.type) : nullptr) .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_element_state : nullptr); + .withDeserializeState(deserialize_state ? deserialize_state->variant_element_state : nullptr); settings.path.back().data = data; nested_serialization->enumerateStreams(settings, callback, data); removeVariantFromPath(settings.path); diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 9a8ed03a81d..b01643a9532 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -66,8 +66,6 @@ #include #include -#include - namespace DB { @@ -4050,9 +4048,9 @@ private: casted_variant_columns.reserve(variant_types.size()); for (size_t i = 0; i != variant_types.size(); ++i) { - auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); + auto variant_col = column_variant.getVariantPtrByGlobalDiscriminator(i); ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; - const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; + const auto & variant_wrapper = variant_wrappers[i]; casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); } @@ -4062,11 +4060,11 @@ private: res->reserve(input_rows_count); for (size_t i = 0; i != input_rows_count; ++i) { - auto local_discr = local_discriminators[i]; - if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + auto global_discr = column_variant.globalDiscriminatorByLocal(local_discriminators[i]); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) res->insertDefault(); else - res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); + res->insertFrom(*casted_variant_columns[global_discr], column_variant.offsetAt(i)); } return res; @@ -4236,14 +4234,14 @@ private: return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); } - WrapperType createDynamicToColumnWrapper(const DataTypePtr & to_type) const + WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const { - return [this, to_type] + return [this] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr { const auto & column_dynamic = assert_cast(*arguments.front().column.get()); const auto & variant_info = column_dynamic.getVariantInfo(); - auto variant_wrapper = createVariantToColumnWrapper(assert_cast(*variant_info.variant_type), to_type); + auto variant_wrapper = createVariantToColumnWrapper(assert_cast(*variant_info.variant_type), result_type); ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")}; return variant_wrapper(args, result_type, col_nullable, input_rows_count); }; @@ -4279,8 +4277,6 @@ private: size_t max_result_num_variants, const ColumnDynamic::Statistics & statistics = {}) const { - LOG_DEBUG(getLogger("FunctionsConversion"), "getReducedVariant for variant {} with size {}", variant_type->getName(), variant_column.size()); - const auto & variant_types = assert_cast(*variant_type).getVariants(); /// First check if we don't exceed the limit in current Variant column. if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String"))) @@ -4296,12 +4292,11 @@ private: { /// String variant won't be removed. String variant_name = variant_types[i]->getName(); - LOG_DEBUG(getLogger("FunctionsConversion"), "Variant {}/{} size: {}, statistics: {}", variant_name, i, variant_column.getVariantByGlobalDiscriminator(i).size(), statistics.data.contains(variant_name) ? toString(statistics.data.at(variant_name)) : "none"); if (variant_name == "String") { old_string_discriminator = i; - /// For simplicity, add this variant to the list that will be converted string, + /// For simplicity, add this variant to the list that will be converted to string, /// so we will process it with other variants when constructing the new String variant. variants_to_convert_to_string.push_back(i); } @@ -4361,11 +4356,9 @@ private: { auto string_type = std::make_shared(); auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type); - LOG_DEBUG(getLogger("FunctionsConversion"), "Convert variant {} with size {} to String", variant_types[discr]->getName(), variant_column.getVariantPtrByGlobalDiscriminator(discr)->size()); auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], ""); ColumnsWithTypeAndName args = {column_to_convert}; auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size()); - LOG_DEBUG(getLogger("FunctionsConversion"), "Got String column with size {}", variant_string_column->size()); string_variant_size += variant_string_column->size(); variants_converted_to_string[discr] = variant_string_column; } @@ -4381,11 +4374,9 @@ private: new_offsets_data.reserve(variant_column.size()); const auto & old_local_discriminators = variant_column.getLocalDiscriminators(); const auto & old_offsets = variant_column.getOffsets(); - LOG_DEBUG(getLogger("FunctionsConversion"), "Discriminators size: {}. Offsets size: {}", old_local_discriminators.size(), old_offsets.size()); for (size_t i = 0; i != old_local_discriminators.size(); ++i) { auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]); - LOG_DEBUG(getLogger("FunctionsConversion"), "Row {}, discriminator {}", i, UInt64(old_discr)); if (old_discr == ColumnVariant::NULL_DISCRIMINATOR) { @@ -4398,12 +4389,10 @@ private: new_discriminators_data.push_back(new_discr); if (new_discr != string_variant_discriminator) { - LOG_DEBUG(getLogger("FunctionsConversion"), "Keep variant {}", UInt64(old_discr)); new_offsets_data.push_back(old_offsets[i]); } else { - LOG_DEBUG(getLogger("FunctionsConversion"), "Get string value of variant {} with String column with size {} at offset {}", UInt64(old_discr), variants_converted_to_string[old_discr]->size(), old_offsets[i]); new_offsets_data.push_back(string_variant->size()); string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]); } diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp index 964c058776e..6752a61b6c3 100644 --- a/src/Functions/dynamicElement.cpp +++ b/src/Functions/dynamicElement.cpp @@ -149,24 +149,30 @@ private: REGISTER_FUNCTION(DynamicElement) { -// factory.registerFunction(FunctionDocumentation{ -// .description = R"( -//Extracts a column with specified type from a `Dynamic` column. -//)", -// .syntax{"dynamicElement(dynamic, type_name)"}, -// .arguments{{ -// {"dynamic", "Dynamic column"}, -// {"type_name", "The name of the variant type to extract"}}}, -// .examples{{{ -// "Example", -// R"( -//)", -// R"( -//)"}}}, -// .categories{"Dynamic"}, -// }); - - factory.registerFunction(); + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Extracts a column with specified type from a `Dynamic` column. +)", + .syntax{"dynamicElement(dynamic, type_name)"}, + .arguments{ + {"dynamic", "Dynamic column"}, + {"type_name", "The name of the variant type to extract"}}, + .examples{{{ + "Example", + R"( +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;)", + R"( +┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ [] │ ᴺᵁᴸᴸ │ [] │ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ ᴺᵁᴸᴸ │ [] │ +└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘ +)"}}}, + .categories{"Dynamic"}, + }); } } diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp index b57ccb6fee1..e63afc68b34 100644 --- a/src/Functions/variantElement.cpp +++ b/src/Functions/variantElement.cpp @@ -171,10 +171,10 @@ REGISTER_FUNCTION(VariantElement) Extracts a column with specified type from a `Variant` column. )", .syntax{"variantElement(variant, type_name, [, default_value])"}, - .arguments{{ + .arguments{ {"variant", "Variant column"}, {"type_name", "The name of the variant type to extract"}, - {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}}, + {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}, .examples{{{ "Example", R"( diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 9107c67afdd..9ef5b58ff91 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2392,12 +2392,14 @@ void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const exception_code = code; } -ColumnPtr IMergeTreeDataPart::readColumnSample(const NameAndTypePair & column) const +ColumnPtr IMergeTreeDataPart::getColumnSample(const NameAndTypePair & column) const { const size_t total_mark = getMarksCount(); - if (!total_mark) + /// If column doesn't have dynamic subcolumns or part has no data, just create column using it's type. + if (!column.type->hasDynamicSubcolumns() || !total_mark) return column.type->createColumn(); + /// Otherwise, read sample column with 0 rows from the part, so it will load dynamic structure. NamesAndTypesList cols; cols.emplace_back(column); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 78619f216c0..ddfc66cc622 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -166,7 +166,9 @@ public: NameAndTypePair getColumn(const String & name) const; std::optional tryGetColumn(const String & column_name) const; - ColumnPtr readColumnSample(const NameAndTypePair & column) const; + /// Get sample column from part. For ordinary columns it just creates column using it's type. + /// For columns with dynamic structure it reads sample column with 0 rows from the part. + ColumnPtr getColumnSample(const NameAndTypePair & column) const; const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index d0a685d95fc..e34822ce6df 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -44,18 +44,29 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } -} - -void MergeTreeDataPartWriterCompact::initStreamsIfNeeded(const Block & block) -{ - if (!compressed_streams.empty()) - return; auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, block.getByName(column.name).column, compression); + addStreams(column, nullptr, compression); + } +} + +void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & block) +{ + if (is_dynamic_streams_initialized) + return; + + is_dynamic_streams_initialized = true; + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block.getByName(column.name).column, compression); + } } } @@ -155,7 +166,8 @@ void writeColumnSingleGranule( void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation) { - initStreamsIfNeeded(block); + /// On first block of data initialize streams for dynamic subcolumns. + initDynamicStreamsIfNeeded(block); /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 1c748803c52..f35479387f6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -44,7 +44,7 @@ private: void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc); - void initStreamsIfNeeded(const Block & block); + void initDynamicStreamsIfNeeded(const Block & block); Block header; @@ -98,6 +98,8 @@ private: /// then finally to 'marks_file'. std::unique_ptr marks_compressor; std::unique_ptr marks_source_hashing; + + bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index c23a9a81cbc..fb7ee9f7fe8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -89,19 +89,29 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( indices_to_recalc_, stats_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { -} - -void MergeTreeDataPartWriterWide::initStreamsIfNeeded(const DB::Block & block) -{ - if (!column_streams.empty()) - return; - - block_sample = block.cloneEmpty(); auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); - addStreams(column, block_sample.getByName(column.name).column, compression); + addStreams(column, nullptr, compression); + } +} + +void MergeTreeDataPartWriterWide::initDynamicStreamsIfNeeded(const DB::Block & block) +{ + if (is_dynamic_streams_initialized) + return; + + is_dynamic_streams_initialized = true; + block_sample = block.cloneEmpty(); + auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block_sample.getByName(column.name).column, compression); + } } } @@ -123,6 +133,10 @@ void MergeTreeDataPartWriterWide::addStreams( else stream_name = full_stream_name; + /// Shared offsets for Nested type. + if (column_streams.contains(stream_name)) + return; + auto it = stream_name_to_full_name.find(stream_name); if (it != stream_name_to_full_name.end() && it->second != full_stream_name) throw Exception(ErrorCodes::INCORRECT_FILE_NAME, @@ -130,10 +144,6 @@ void MergeTreeDataPartWriterWide::addStreams( " It is a collision between a filename for one column and a hash of filename for another column or a bug", stream_name, it->second, full_stream_name); - /// Shared offsets for Nested type. - if (column_streams.contains(stream_name)) - return; - const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; @@ -231,7 +241,8 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation) { - initStreamsIfNeeded(block); + /// On first block of data initialize streams for dynamic subcolumns. + initDynamicStreamsIfNeeded(block); /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, @@ -604,7 +615,6 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai " index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } - } void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index ebdd907914f..8343144f2e1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -87,7 +87,7 @@ private: const ColumnPtr & column, const ASTPtr & effective_codec_desc); - void initStreamsIfNeeded(const Block & block); + void initDynamicStreamsIfNeeded(const Block & block); /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical @@ -135,6 +135,8 @@ private: size_t rows_written_in_last_mark = 0; Block block_sample; + + bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index d18d5eec975..64ca6132cc4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -349,7 +349,7 @@ void MergeTreeReaderWide::prefetchForColumn( } }; - auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializePrefix(deserialize_binary_bulk_state_map[name_and_type.name]); + auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializeState(deserialize_binary_bulk_state_map[name_and_type.name]); ISerialization::EnumerateStreamsSettings settings; serialization->enumerateStreams(settings, callback, data); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5e388d6a8ac..2bbc5bdb3ae 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -60,6 +60,21 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } +static bool haveMutationsOfDynamicColumns(const MergeTreeData::DataPartPtr & data_part, const MutationCommands & commands) +{ + for (const auto & command : commands) + { + if (!command.column_name.empty()) + { + auto column = data_part->tryGetColumn(command.column_name); + if (column && column->type->hasDynamicSubcolumns()) + return true; + } + } + + return false; +} + static UInt64 getExistingRowsCount(const Block & block) { auto column = block.getByName(RowExistsColumn::name).column; @@ -95,7 +110,7 @@ static void splitAndModifyMutationCommands( auto part_columns = part->getColumnsDescription(); const auto & table_columns = metadata_snapshot->getColumns(); - if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) + if (haveMutationsOfDynamicColumns(part, commands) || !isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { NameSet mutated_columns; NameSet dropped_columns; @@ -2250,7 +2265,9 @@ bool MutateTask::prepare() /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data - if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) + /// Also currently mutations of types with dynamic subcolumns in Wide part are possible only by + /// rewriting the whole part. + if (MutationHelpers::haveMutationsOfDynamicColumns(ctx->source_part, ctx->commands_for_part) || !isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) || (ctx->interpreter && ctx->interpreter->isAffectingAllColumns())) { /// In case of replicated merge tree with zero copy replication diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index b4d32e71d0d..fc06bcac823 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -219,7 +219,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( auto file_name = *stream_name + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - }); + }, column.type, data_part->getColumnSample(column)); } } else diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled b/tests/queries/0_stateless/03040_dynamic_type_alters.sh similarity index 100% rename from tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled rename to tests/queries/0_stateless/03040_dynamic_type_alters.sh From 5f75e9847aa88921fb910a0b1857cb8f57d05c62 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 30 Apr 2024 10:52:59 +0000 Subject: [PATCH 106/651] revert --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 70 +++++++++++++---------- src/Analyzer/TableFunctionNode.cpp | 6 -- src/Analyzer/TableNode.cpp | 9 +-- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index ea3c04a5d49..a3c60c76132 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3389,15 +3389,19 @@ private: const ContextPtr & context; }; +/// Compare resolved identifiers considering columns that become nullable after JOIN bool resolvedIdenfiersFromJoinAreEquals( const QueryTreeNodePtr & left_resolved_identifier, - const QueryTreeNodePtr & right_resolved_identifier) + const QueryTreeNodePtr & right_resolved_identifier, + const IdentifierResolveScope & scope) { - auto * left_resolved_column_to_compare = left_resolved_identifier->as(); - auto * right_resolved_column_to_compare = right_resolved_identifier->as(); + auto left_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(left_resolved_identifier, scope.join_columns_with_changed_types); + const auto & left_resolved_to_compare = left_original_node ? left_original_node : left_resolved_identifier; - return left_resolved_column_to_compare && right_resolved_column_to_compare - && left_resolved_column_to_compare->getColumnName() == right_resolved_column_to_compare->getColumnName(); + auto right_original_node = ReplaceColumnsVisitor::findTransitiveReplacement(right_resolved_identifier, scope.join_columns_with_changed_types); + const auto & right_resolved_to_compare = right_original_node ? right_original_node : right_resolved_identifier; + + return left_resolved_to_compare->isEqual(*right_resolved_to_compare, IQueryTreeNode::CompareOptions{.compare_aliases = false}); } QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup, @@ -3540,34 +3544,42 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo resolved_identifier = std::move(result_column_node); } - else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier)) + else if (resolvedIdenfiersFromJoinAreEquals(left_resolved_identifier, right_resolved_identifier, scope)) { const auto & identifier_path_part = identifier_lookup.identifier.front(); - const auto & left_resolved_identifier_column = left_resolved_identifier->as(); - const auto & right_resolved_identifier_column = right_resolved_identifier->as(); + auto * left_resolved_identifier_column = left_resolved_identifier->as(); + auto * right_resolved_identifier_column = right_resolved_identifier->as(); - const auto & left_column_source_alias = left_resolved_identifier_column.getColumnSource()->getAlias(); - const auto & right_column_source_alias = right_resolved_identifier_column.getColumnSource()->getAlias(); - - /** If column from right table was resolved using alias, we prefer column from right table. - * - * Example: SELECT dummy FROM system.one JOIN system.one AS A ON A.dummy = system.one.dummy; - * - * If alias is specified for left table, and alias is not specified for right table and identifier was resolved - * without using left table alias, we prefer column from right table. - * - * Example: SELECT dummy FROM system.one AS A JOIN system.one ON A.dummy = system.one.dummy; - * - * Otherwise we prefer column from left table. - */ - bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias; - bool column_resolved_without_using_left_alias = !left_column_source_alias.empty() - && right_column_source_alias.empty() - && identifier_path_part != left_column_source_alias; - if (column_resolved_using_right_alias || column_resolved_without_using_left_alias) + if (left_resolved_identifier_column && right_resolved_identifier_column) { - resolved_side = JoinTableSide::Right; - resolved_identifier = right_resolved_identifier; + const auto & left_column_source_alias = left_resolved_identifier_column->getColumnSource()->getAlias(); + const auto & right_column_source_alias = right_resolved_identifier_column->getColumnSource()->getAlias(); + + /** If column from right table was resolved using alias, we prefer column from right table. + * + * Example: SELECT dummy FROM system.one JOIN system.one AS A ON A.dummy = system.one.dummy; + * + * If alias is specified for left table, and alias is not specified for right table and identifier was resolved + * without using left table alias, we prefer column from right table. + * + * Example: SELECT dummy FROM system.one AS A JOIN system.one ON A.dummy = system.one.dummy; + * + * Otherwise we prefer column from left table. + */ + bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias; + bool column_resolved_without_using_left_alias = !left_column_source_alias.empty() + && right_column_source_alias.empty() + && identifier_path_part != left_column_source_alias; + if (column_resolved_using_right_alias || column_resolved_without_using_left_alias) + { + resolved_side = JoinTableSide::Right; + resolved_identifier = right_resolved_identifier; + } + else + { + resolved_side = JoinTableSide::Left; + resolved_identifier = left_resolved_identifier; + } } else { diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index 5b1711fd69f..87d2fdcffb5 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -94,12 +94,6 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) if (settings_changes != rhs_typed.settings_changes) return false; - /// For table functions, we should always compare aliases - /// because entries of table functions with the same name and arguments but with different aliases in a query - /// refer to different reads. - if (getAlias() != rhs_typed.getAlias()) - return false; - return table_expression_modifiers == rhs_typed.table_expression_modifiers; } diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index b95e85db35d..daf5db08551 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -55,13 +55,8 @@ void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s bool TableNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); - return storage_id == rhs_typed.storage_id - && table_expression_modifiers == rhs_typed.table_expression_modifiers - && temporary_table_name == rhs_typed.temporary_table_name - /// For tables, we should always compare aliases - /// because entries of tables with the same name but with different aliases in a query - /// refer to different reads. - && getAlias() == rhs_typed.getAlias(); + return storage_id == rhs_typed.storage_id && table_expression_modifiers == rhs_typed.table_expression_modifiers && + temporary_table_name == rhs_typed.temporary_table_name; } void TableNode::updateTreeHashImpl(HashState & state, CompareOptions) const From e7f66384f1bbed3c1ebe1417b07a05f4e2dfbfe4 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 30 Apr 2024 20:11:41 +0200 Subject: [PATCH 107/651] fix intersection when restart after drop range --- src/Storages/MergeTree/MergeTreeData.cpp | 16 ++++- .../test_intersecting_parts/__init__.py | 0 .../test_intersecting_parts/test.py | 70 +++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_intersecting_parts/__init__.py create mode 100644 tests/integration/test_intersecting_parts/test.py diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fcab606130d..bbf89a87282 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1656,6 +1656,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional parts_to_load_by_disk(disks.size()); + std::vector unexpected_parts_to_load_by_disk(disks.size()); ThreadPoolCallbackRunnerLocal runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); @@ -1666,6 +1667,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), format_version)) - disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + { + if (expected_parts && !expected_parts->contains(it->name())) + unexpected_disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + else + disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + } } }, Priority{0}); } @@ -1688,6 +1695,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional(info, name, disk)); + loading_tree.traverse(/*recursive=*/ true, [&](const auto & node) { if (!node->is_loaded) diff --git a/tests/integration/test_intersecting_parts/__init__.py b/tests/integration/test_intersecting_parts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_intersecting_parts/test.py b/tests/integration/test_intersecting_parts/test.py new file mode 100644 index 00000000000..18485e68578 --- /dev/null +++ b/tests/integration/test_intersecting_parts/test.py @@ -0,0 +1,70 @@ +import pytest +import logging + +from helpers.cluster import ClickHouseCluster +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", with_zookeeper=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +# This test construct intersecting parts intentially. It's not a elegent test. +# TODO(hanfei): write a test which select part 1_1 merging with part 2_2 and drop range. +def test_intersect_parts_when_restart(started_cluster): + node.query( + """ + CREATE TABLE data ( + key Int + ) + ENGINE = ReplicatedMergeTree('/ch/tables/default/data', 'node') + ORDER BY key; + """ + ) + node.query("system stop cleanup data") + node.query("INSERT INTO data values (1)") + node.query("INSERT INTO data values (2)") + node.query("INSERT INTO data values (3)") + node.query("INSERT INTO data values (4)") + node.query("ALTER TABLE data DROP PART 'all_1_1_0'") + node.query("ALTER TABLE data DROP PART 'all_2_2_0'") + node.query("OPTIMIZE TABLE data FINAL") + + part_path = node.query("SELECT path FROM system.parts WHERE table = 'data' and name = 'all_0_3_1'").strip() + + assert len(part_path) != 0 + + node.query("detach table data") + new_path = part_path[:-6]+"1_2_3" + node.exec_in_container( + [ + "bash", + "-c", + "cp -r {p} {p1}".format( + p=part_path, p1=new_path + ), + ], + privileged=True, + ) + + # mock empty part + node.exec_in_container( + [ + "bash", + "-c", + "echo -n 0 > {p1}/count.txt".format( + p1=new_path + ), + ], + privileged=True, + ) + + node.query("attach table data") + data_size = node.query("SELECT sum(key) FROM data").strip() + assert(data_size == "5") From 2effd82fc2e06380bf575cbde0375daf52a2dc7a Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 30 Apr 2024 20:24:37 +0200 Subject: [PATCH 108/651] better style --- .../test_intersecting_parts/test.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_intersecting_parts/test.py b/tests/integration/test_intersecting_parts/test.py index 18485e68578..3a9732f22de 100644 --- a/tests/integration/test_intersecting_parts/test.py +++ b/tests/integration/test_intersecting_parts/test.py @@ -2,9 +2,11 @@ import pytest import logging from helpers.cluster import ClickHouseCluster + cluster = ClickHouseCluster(__file__) node = cluster.add_instance("node", with_zookeeper=True) + @pytest.fixture(scope="module") def started_cluster(): try: @@ -15,18 +17,19 @@ def started_cluster(): finally: cluster.shutdown() + # This test construct intersecting parts intentially. It's not a elegent test. # TODO(hanfei): write a test which select part 1_1 merging with part 2_2 and drop range. def test_intersect_parts_when_restart(started_cluster): node.query( - """ + """ CREATE TABLE data ( key Int ) ENGINE = ReplicatedMergeTree('/ch/tables/default/data', 'node') ORDER BY key; """ - ) + ) node.query("system stop cleanup data") node.query("INSERT INTO data values (1)") node.query("INSERT INTO data values (2)") @@ -36,19 +39,19 @@ def test_intersect_parts_when_restart(started_cluster): node.query("ALTER TABLE data DROP PART 'all_2_2_0'") node.query("OPTIMIZE TABLE data FINAL") - part_path = node.query("SELECT path FROM system.parts WHERE table = 'data' and name = 'all_0_3_1'").strip() + part_path = node.query( + "SELECT path FROM system.parts WHERE table = 'data' and name = 'all_0_3_1'" + ).strip() assert len(part_path) != 0 node.query("detach table data") - new_path = part_path[:-6]+"1_2_3" + new_path = part_path[:-6] + "1_2_3" node.exec_in_container( [ "bash", "-c", - "cp -r {p} {p1}".format( - p=part_path, p1=new_path - ), + "cp -r {p} {p1}".format(p=part_path, p1=new_path), ], privileged=True, ) @@ -58,13 +61,11 @@ def test_intersect_parts_when_restart(started_cluster): [ "bash", "-c", - "echo -n 0 > {p1}/count.txt".format( - p1=new_path - ), + "echo -n 0 > {p1}/count.txt".format(p1=new_path), ], privileged=True, ) node.query("attach table data") data_size = node.query("SELECT sum(key) FROM data").strip() - assert(data_size == "5") + assert data_size == "5" From df92f422376173ba93228760d5c210dc21b4c128 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Apr 2024 18:45:19 +0000 Subject: [PATCH 109/651] Fix tests, improve dynamic/variantElement functions, add more comments --- src/Columns/ColumnArray.cpp | 2 +- src/Columns/ColumnConst.cpp | 9 ------- src/Columns/ColumnConst.h | 2 -- src/Columns/ColumnDynamic.cpp | 9 +++---- src/Columns/ColumnDynamic.h | 19 ++++++++----- src/Columns/ColumnMap.cpp | 2 +- src/Columns/ColumnNullable.cpp | 2 +- src/Columns/ColumnSparse.cpp | 2 +- src/Columns/ColumnTuple.cpp | 2 +- src/Columns/ColumnVariant.cpp | 2 +- src/Columns/IColumn.h | 3 +++ src/DataTypes/DataTypeDynamic.h | 3 +++ src/DataTypes/Serializations/ISerialization.h | 3 ++- .../SerializationDynamicElement.cpp | 3 +++ .../SerializationDynamicElement.h | 2 +- .../SerializationVariantElement.cpp | 4 +-- src/Functions/dynamicElement.cpp | 26 ++++++------------ src/Functions/dynamicType.cpp | 14 +++++++--- src/Functions/variantElement.cpp | 27 +++++++------------ src/Interpreters/TreeRewriter.cpp | 9 ++----- src/Interpreters/convertFieldToType.cpp | 3 --- src/Parsers/ParserDataType.cpp | 5 +++- src/Processors/Formats/IOutputFormat.h | 3 +-- src/Processors/Merges/Algorithms/MergedData.h | 3 +++ .../Transforms/ColumnGathererTransform.cpp | 3 +++ src/Storages/ColumnsDescription.cpp | 3 +++ .../MergeTree/MergeTreeReaderWide.cpp | 1 - src/Storages/MergeTree/MergeTreeSettings.h | 1 - .../0_stateless/02941_variant_type_4.sh | 2 +- .../03038_nested_dynamic_merges.reference | 10 +++---- .../03038_nested_dynamic_merges.sh | 8 +++--- .../03039_dynamic_all_merge_algorithms_1.sh | 12 ++++----- 32 files changed, 98 insertions(+), 101 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 29773492dc9..b8e2a541f5f 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -1289,7 +1289,7 @@ size_t ColumnArray::getNumberOfDimensions() const return 1 + nested_array->getNumberOfDimensions(); /// Every modern C++ compiler optimizes tail recursion. } -void ColumnArray::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnArray::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { Columns nested_source_columns; nested_source_columns.reserve(source_columns.size()); diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index cf3f448516c..f2cea83db0e 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -159,15 +159,6 @@ void ColumnConst::compareColumn( std::fill(compare_results.begin(), compare_results.end(), res); } -void ColumnConst::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) -{ - Columns nested_source_columns; - nested_source_columns.reserve(source_columns.size()); - for (const auto & source_column : source_columns) - nested_source_columns.push_back(assert_cast(*source_column).getDataColumnPtr()); - data->takeDynamicStructureFromSourceColumns(nested_source_columns); -} - ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value) { auto data = column->cloneEmpty(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 042468cbbcc..c2c0fa3027c 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -308,8 +308,6 @@ public: bool isCollationSupported() const override { return data->isCollationSupported(); } bool hasDynamicStructure() const override { return data->hasDynamicStructure(); } - - void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; }; ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value); diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index f3dff01af25..a1dd60f4748 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -65,14 +65,14 @@ bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant) if (variant_info.variant_names.size() >= max_dynamic_types) { /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant. - /// Otherwise we won't be able to add cast new variants to Strings. + /// Otherwise we won't be able to cast new variants to Strings. if (!variant_info.variant_name_to_discriminator.contains("String")) throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists"); return false; } - /// If we have max_dynamic_types - 1 number of variants and don't have String variant, we can add only String variant. + /// If we have (max_dynamic_types - 1) number of variants and don't have String variant, we can add only String variant. if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String")) return false; @@ -218,7 +218,7 @@ void ColumnDynamic::insert(const DB::Field & x) return; /// If we cannot insert field into current variant column, extend it with new variant for this field from its type. - if (likely(addNewVariant(applyVisitor(FieldToDataType(), x)))) + if (addNewVariant(applyVisitor(FieldToDataType(), x))) { /// Now we should be able to insert this field into extended variant column. variant_column->insert(x); @@ -566,7 +566,6 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) } /// We reached maximum number of variants and couldn't add new variant. - /// This case should be really rare in real use cases. /// We should always be able to add String variant and cast inserted value to String. addStringVariant(); /// Create temporary column of this variant type and deserialize value into it. @@ -645,7 +644,7 @@ ColumnPtr ColumnDynamic::compress() const }); } -void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { if (!empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column"); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index b5167f4b9d9..4e9c7edd5f9 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -22,15 +22,18 @@ namespace DB class ColumnDynamic final : public COWHelper, ColumnDynamic> { public: + /// struct Statistics { enum class Source { - READ, - MERGE, + READ, /// Statistics were loaded into column during reading from MergeTree. + MERGE, /// Statistics were calculated during merge of several MergeTree parts. }; + /// Source of the statistics. Source source; + /// Statistics data: (variant name) -> (total variant size in data part). std::unordered_map data; }; @@ -42,9 +45,9 @@ private: DataTypePtr variant_type; /// Name of the whole variant to not call getName() every time. String variant_name; - /// Store names of variants to not call getName() every time on variants. + /// Names of variants to not call getName() every time on variants. Names variant_names; - /// Store mapping (variant name) -> (global discriminator). + /// Mapping (variant name) -> (global discriminator). /// It's used during variant extension. std::unordered_map variant_name_to_discriminator; }; @@ -335,7 +338,7 @@ private: /// Combine current variant with the other variant and return global discriminators mapping /// from other variant to the combined one. It's used for inserting from /// different variants. - /// Returns nullptr if maximum number of Variants is reached and the new Variant cannot be created. + /// Returns nullptr if maximum number of variants is reached and the new variant cannot be created. std::vector * combineVariants(const VariantInfo & other_variant_info); void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type); @@ -343,7 +346,7 @@ private: WrappedPtr variant_column; /// Store the type of current variant with some additional information. VariantInfo variant_info; - /// Maximum number of different types that can be stored in Dynamic. + /// The maximum number of different types that can be stored in this Dynamic column. /// If exceeded, all new variants will be converted to String. size_t max_dynamic_types; @@ -351,7 +354,11 @@ private: /// Used in takeDynamicStructureFromSourceColumns and set during deserialization. Statistics statistics; + /// Cache (Variant name) -> (global discriminators mapping from this variant to current variant in Dynamic column). + /// Used to avoid mappings recalculation in combineVariants for the same Variant types. std::unordered_map> variant_mappings_cache; + /// Cache of Variant types that couldn't be combined with current variant in Dynamic column. + /// Used to avoid checking if combination is possible for the same Variant types. std::unordered_set variants_with_failed_combination; }; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 48e8bced23a..eecea1a273f 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -312,7 +312,7 @@ ColumnPtr ColumnMap::compress() const }); } -void ColumnMap::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnMap::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { Columns nested_source_columns; nested_source_columns.reserve(source_columns.size()); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 011f3702bdf..bb0e15d39ab 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -835,7 +835,7 @@ ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const return res; } -void ColumnNullable::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnNullable::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { Columns nested_source_columns; nested_source_columns.reserve(source_columns.size()); diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 80e20bb7631..d54801b6e07 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -801,7 +801,7 @@ ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const return Iterator(offsets_data, _size, current_offset, n); } -void ColumnSparse::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnSparse::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { Columns values_source_columns; values_source_columns.reserve(source_columns.size()); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 4e8e4063157..19f74048d84 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -582,7 +582,7 @@ bool ColumnTuple::hasDynamicStructure() const return false; } -void ColumnTuple::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { std::vector nested_source_columns; nested_source_columns.resize(columns.size()); diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 819491f7fd9..ec47f5dfa74 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1539,7 +1539,7 @@ bool ColumnVariant::hasDynamicStructure() const return false; } -void ColumnVariant::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) +void ColumnVariant::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { std::vector variants_source_columns; variants_source_columns.resize(variants.size()); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 33f398474ed..76f5af5bcd7 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -534,7 +534,10 @@ public: return res; } + /// Checks if column has dynamic subcolumns. virtual bool hasDynamicStructure() const { return false; } + /// For columns with dynamic subcolumns this method takes dynamic structure from source columns + /// and creates proper resulting dynamic structure in advance for merge of these source columns. virtual void takeDynamicStructureFromSourceColumns(const std::vector & /*source_columns*/) {} /** Some columns can contain another columns inside. diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h index 452e05061a0..9fc727fd9c8 100644 --- a/src/DataTypes/DataTypeDynamic.h +++ b/src/DataTypes/DataTypeDynamic.h @@ -8,6 +8,8 @@ namespace DB { +/// Dynamic type allows to store values of any type inside it and to read +/// subcolumns with any type without knowing all of them in advance. class DataTypeDynamic final : public IDataType { public: @@ -28,6 +30,7 @@ public: Field getDefault() const override; + /// 2 Dynamic types with different max_dynamic_types parameters are considered as different. bool equals(const IDataType & rhs) const override { if (const auto * rhs_dynamic_type = typeid_cast(&rhs)) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index b233230f9cc..914ff9cf4a2 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -151,7 +151,8 @@ public: /// For types with dynamic subcolumns deserialize state contains information /// about current dynamic structure. And this information can be useful - /// when we call enumerateStreams to enumerate dynamic streams. + /// when we call enumerateStreams after deserializeBinaryBulkStatePrefix + /// to enumerate dynamic streams. DeserializeBinaryBulkStatePtr deserialize_state; }; diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index 059a7d57e4e..b0a4e63d0a5 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -97,6 +97,9 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { + if (!state) + return; + auto * dynamic_element_state = checkAndGetState(state); if (dynamic_element_state->variant_serialization) diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 9e4980e0a27..2ddc3324139 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -10,7 +10,7 @@ namespace DB class SerializationDynamicElement final : public SerializationWrapper { private: - /// To be able to deserialize Dyna,ic element as a subcolumn + /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index dc7fc3b9b35..1f9a81ac671 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -45,8 +45,8 @@ void SerializationVariantElement::enumerateStreams( .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr) .withSerializationInfo(data.serialization_info) .withDeserializeState(deserialize_state ? deserialize_state->variant_element_state : nullptr); - settings.path.back().data = data; - nested_serialization->enumerateStreams(settings, callback, data); + settings.path.back().data = nested_data; + nested_serialization->enumerateStreams(settings, callback, nested_data); removeVariantFromPath(settings.path); } diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp index 6752a61b6c3..202533dc5c8 100644 --- a/src/Functions/dynamicElement.cpp +++ b/src/Functions/dynamicElement.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ public: getName(), arguments[0].type->getName()); - auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedElementType(arguments[1].column)); + auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedType(arguments[1].column)); for (; count_arrays; --count_arrays) return_type = std::make_shared(return_type); @@ -97,29 +98,18 @@ public: } const ColumnDynamic * input_col_as_dynamic = checkAndGetColumn(input_col); - if (!input_col_as_dynamic) + const DataTypeDynamic * input_type_as_dynamic = checkAndGetDataType(input_type); + if (!input_col_as_dynamic || !input_type_as_dynamic) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Dynamic or array of Dynamics. Actual {}", getName(), input_arg.type->getName()); - auto element_type = getRequestedElementType(arguments[1].column); - const auto & variant_info = input_col_as_dynamic->getVariantInfo(); - auto it = variant_info.variant_name_to_discriminator.find(element_type->getName()); - if (it == variant_info.variant_name_to_discriminator.end()) - { - auto result_type = makeNullableOrLowCardinalityNullableSafe(element_type); - auto result_column = result_type->createColumn(); - result_column->insertManyDefaults(input_rows_count); - return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count); - } - - const auto & variant_column = input_col_as_dynamic->getVariantColumn(); - auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), element_type->getName(), it->second, variant_column.localDiscriminatorByGlobal(it->second)); - auto result_column = subcolumn_creator.create(variant_column.getVariantPtrByGlobalDiscriminator(it->second)); - return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count); + auto type = getRequestedType(arguments[1].column); + auto subcolumn = input_type_as_dynamic->getSubcolumn(type->getName(), input_col_as_dynamic->getPtr()); + return wrapInArraysAndConstIfNeeded(std::move(subcolumn), array_offsets, input_arg_is_const, input_rows_count); } private: - DataTypePtr getRequestedElementType(const ColumnPtr & type_name_column) const + DataTypePtr getRequestedType(const ColumnPtr & type_name_column) const { const auto * name_col = checkAndGetColumnConst(type_name_column.get()); if (!name_col) diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp index 8fb2974ceff..e8ca73597d6 100644 --- a/src/Functions/dynamicType.cpp +++ b/src/Functions/dynamicType.cpp @@ -21,7 +21,7 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; namespace { -/// Return enum with type name for each row in Dynamic column. +/// Return String with type name for each row in Dynamic column. class FunctionDynamicType : public IFunction { public: @@ -89,13 +89,21 @@ REGISTER_FUNCTION(DynamicType) Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it. )", .syntax = {"dynamicType(variant)"}, - .arguments = {{"variant", "Variant column"}}, + .arguments = {{"dynamic", "Dynamic column"}}, .examples = {{{ "Example", R"( +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT d, dynamicType(d) FROM test; )", R"( - +┌─d─────────────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ Hello, World! │ String │ +│ [1,2,3] │ Array(Int64) │ +└───────────────┴────────────────┘ )"}}}, .categories{"Variant"}, }); diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp index e63afc68b34..80d34083d9d 100644 --- a/src/Functions/variantElement.cpp +++ b/src/Functions/variantElement.cpp @@ -112,18 +112,15 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName()); - std::optional variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); + auto variant_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size()); - if (!variant_global_discr.has_value()) + if (!variant_discr) return arguments[2].column; - auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr); - const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr); - const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr); - auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(input_col_as_variant->getLocalDiscriminatorsPtr(), variant_type->getName(), *variant_global_discr, variant_local_discr); - auto res = subcolumn_creator.create(variant_column); - return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count); + auto variant_column = input_type_as_variant->getSubcolumn(input_type_as_variant->getVariant(*variant_discr)->getName(), input_col_as_variant->getPtr()); + return wrapInArraysAndConstIfNeeded(std::move(variant_column), array_offsets, input_arg_is_const, input_rows_count); } + private: std::optional getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const { @@ -133,20 +130,16 @@ private: "Second argument to {} with Variant argument must be a constant String", getName()); - String variant_element_name = name_col->getValue(); - auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name); - if (variant_element_type) + auto variant_element_name = name_col->getValue(); + if (auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name)) { - const auto & variants = variant_type.getVariants(); - for (size_t i = 0; i != variants.size(); ++i) - { - if (variants[i]->getName() == variant_element_type->getName()) - return i; - } + if (auto discr = variant_type.tryGetVariantDiscriminator(variant_element_type->getName())) + return discr; } if (argument_size == 2) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name); + return std::nullopt; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a6cb378243a..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -2,7 +2,7 @@ #include #include -//#include +#include #include #include @@ -1188,27 +1188,22 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } + /// Check for dynamic subcolums in unknown required columns. if (!unknown_required_source_columns.empty()) { - for (const NameAndTypePair & pair : source_columns_ordinary) { -// std::cerr << "Check ordinary column " << pair.name << "\n"; if (!pair.type->hasDynamicSubcolumns()) continue; -// std::cerr << "Check dyamic subcolumns\n"; - for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) { auto [column_name, dynamic_subcolumn_name] = Nested::splitName(*it); -// std::cerr << "Check dyamic subcolumn " << dynamic_subcolumn_name << "\n"; if (column_name == pair.name) { if (auto dynamic_subcolumn_type = pair.type->tryGetSubcolumnType(dynamic_subcolumn_name)) { -// std::cerr << "Found\n"; source_columns.emplace_back(*it, dynamic_subcolumn_type); it = unknown_required_source_columns.erase(it); continue; diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 30b7de409f1..9363e3d83eb 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -27,7 +27,6 @@ #include #include #include -#include namespace DB @@ -167,8 +166,6 @@ Field convertDecimalType(const Field & from, const To & type) Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) { - checkStackSize(); - if (from_type_hint && from_type_hint->equals(type)) { return src; diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index 747a9a6f7ba..573430ae9ab 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -7,12 +7,14 @@ #include #include + namespace DB { namespace { +/// Parser of Dynamic type arguments: Dynamic(max_types=N) class DynamicArgumentsParser : public IParserBase { private: @@ -47,7 +49,8 @@ private: /// - Nested table elements; /// - Enum element in form of 'a' = 1; /// - literal; -/// - another data type (or identifier) +/// - Dynamic type arguments; +/// - another data type (or identifier); class ParserDataTypeArgument : public IParserBase { public: diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 9996bedb20e..cae2ab7691e 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -105,8 +105,6 @@ public: } } - virtual void finalizeBuffers() {} - protected: friend class ParallelFormattingOutputFormat; @@ -124,6 +122,7 @@ protected: virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} virtual void finalizeImpl() {} + virtual void finalizeBuffers() {} virtual void writePrefix() {} virtual void writeSuffix() {} virtual void resetFormatterImpl() {} diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 95f915e4478..c5bb074bb0c 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -99,6 +99,9 @@ public: { columns[i] = columns[i]->cloneResized(num_rows); } + /// For columns with Dynamic structure we cannot just take column from input chunk because resulting column may have + /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns). + /// We should insert into data resulting column using insertRangeFrom. else if (columns[i]->hasDynamicStructure()) { columns[i] = columns[i]->cloneEmpty(); diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index 6736cd59e83..b6bcec26c0c 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -60,6 +60,9 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() if (source_to_fully_copy) /// Was set on a previous iteration { Chunk res; + /// For columns with Dynamic structure we cannot just take column source_to_fully_copy because resulting column may have + /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns). + /// We should insert into data resulting column using insertRangeFrom. if (result_column->hasDynamicStructure()) { auto col = result_column->cloneEmpty(); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 6f844e31970..3a3ee0d1d14 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -550,6 +550,7 @@ bool ColumnsDescription::hasSubcolumn(const String & column_name) const if (subcolumns.get<0>().count(column_name)) return true; + /// Check for dynamic subcolumns auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); auto it = columns.get<1>().find(ordinary_column_name); if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) @@ -655,6 +656,7 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns return *jt; } + /// Check for dynmaic subcolumns. auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); it = columns.get<1>().find(ordinary_column_name); if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) @@ -752,6 +754,7 @@ bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, cons if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name)) return true; + /// Check for dynamic subcolumns. auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); it = columns.get<1>().find(ordinary_column_name); if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 64ca6132cc4..de6b742934f 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -1,5 +1,4 @@ #include -#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 375c1e37bae..a00508fd1c1 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,7 +43,6 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ - /** M(UInt64, max_types_for_dynamic_serialization, 32, "The maximum number of different types in Dynamic column stored separately in MergeTree tables in wide format. If exceeded, new types will be converted to String", 0) */ \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index f6eaf2fcc9a..ddff3852865 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --max_insert_threads 0 --group_by_two_level_threshold 454338 --group_by_two_level_threshold_bytes 50000000 --distributed_aggregation_memory_efficient 1 --fsync_metadata 0 --output_format_parallel_formatting 0 --input_format_parallel_parsing 1 --min_chunk_bytes_for_parallel_parsing 10898151 --max_read_buffer_size 730200 --prefer_localhost_replica 1 --max_block_size 77643 --max_threads 18 --optimize_append_index 0 --optimize_if_chain_to_multiif 0 --optimize_if_transform_strings_to_enum 0 --optimize_read_in_order 0 --optimize_or_like_chain 0 --optimize_substitute_columns 0 --enable_multiple_prewhere_read_steps 0 --read_in_order_two_level_merge_threshold 20 --optimize_aggregation_in_order 1 --aggregation_in_order_max_block_bytes 39857781 --use_uncompressed_cache 1 --min_bytes_to_use_direct_io 1 --min_bytes_to_use_mmap_io 10737418240 --local_filesystem_read_method io_uring --remote_filesystem_read_method threadpool --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 10 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 1 --throw_on_error_from_cache_on_write_operations 1 --remote_filesystem_read_prefetch 0 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 128Mi --filesystem_prefetches_limit 0 --filesystem_prefetch_min_bytes_for_single_read_task 8Mi --filesystem_prefetch_step_marks 0 --filesystem_prefetch_step_bytes 100Mi --compile_aggregate_expressions 0 --compile_sort_description 0 --merge_tree_coarse_index_granularity 30 --optimize_distinct_in_order 1 --max_bytes_before_external_sort 10737418240 --max_bytes_before_external_group_by 1 --max_bytes_before_remerge_sort 2279999838 --min_compress_block_size 56847 --max_compress_block_size 2399536 --merge_tree_compact_parts_min_granules_to_multibuffer_read 39 --optimize_sorting_by_input_stream_properties 1 --http_response_buffer_size 2739586 --http_wait_end_of_query False --enable_memory_bound_merging_of_aggregation_results 1 --min_count_to_compile_expression 3 --min_count_to_compile_aggregate_expression 0 --min_count_to_compile_sort_description 3 --session_timezone America/Mazatlan --prefer_warmed_unmerged_parts_seconds 7 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.19 --ratio_of_defaults_for_sparse_serialization 0.0 --prefer_fetch_merged_part_size_threshold 1 --vertical_merge_algorithm_min_rows_to_activate 389696 --vertical_merge_algorithm_min_columns_to_activate 100 --allow_vertical_merges_from_compact_to_wide_parts 0 --min_merge_bytes_to_use_direct_io 10737418240 --index_granularity_bytes 16233524 --merge_max_block_size 6455 --index_granularity 16034 --min_bytes_for_wide_part 0 --compress_marks 0 --compress_primary_key 0 --marks_compress_block_size 15959 --primary_key_compress_block_size 70269 --replace_long_file_name_to_hash 1 --max_file_name_length 123 --min_bytes_for_full_part_storage 0 --compact_parts_max_bytes_to_buffer 511937149 --compact_parts_max_granules_to_buffer 142 --compact_parts_merge_max_bytes_to_prefetch_part 28443027 --cache_populated_by_fetch 0 --concurrent_part_removal_threshold 0 --old_parts_lifetime 480" function test6_insert() { diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference index f8118ce8b95..65034647775 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference @@ -2,8 +2,8 @@ MergeTree compact + horizontal merge test 16667 Tuple(a Dynamic(max_types=3)):Date 33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 100000 UInt64:None 33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) 50000 Tuple(a Dynamic(max_types=3)):UInt64 @@ -25,8 +25,8 @@ MergeTree wide + horizontal merge test 16667 Tuple(a Dynamic(max_types=3)):Date 33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 100000 UInt64:None 33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) 50000 Tuple(a Dynamic(max_types=3)):UInt64 @@ -40,8 +40,8 @@ test 100000 UInt64:None 133333 Tuple(a Dynamic(max_types=3)):None 50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None 100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None 116667 Tuple(a Dynamic(max_types=3)):String 133333 Tuple(a Dynamic(max_types=3)):None MergeTree compact + vertical merge @@ -59,8 +59,8 @@ test 33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) 50000 Tuple(a Dynamic(max_types=3)):UInt64 66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None 100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None 133333 Tuple(a Dynamic(max_types=3)):None 50000 Tuple(a Dynamic(max_types=3)):UInt64 100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) @@ -86,7 +86,7 @@ test 100000 UInt64:None 133333 Tuple(a Dynamic(max_types=3)):None 50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None 100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None 116667 Tuple(a Dynamic(max_types=3)):String 133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh index afb167ec20d..b82ddb3813e 100755 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -18,16 +18,16 @@ function test() $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" } $CH_CLIENT -q "drop table if exists test;" diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh index 3384a135307..9298fe28fec 100755 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh @@ -18,9 +18,9 @@ function test() $CH_CLIENT -q "insert into test select number, number from numbers(100000)" $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "drop table test" echo "SummingMergeTree" @@ -29,10 +29,10 @@ function test() $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "select count(), sum from test group by sum" $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "select count(), sum from test group by sum" $CH_CLIENT -q "drop table test" @@ -42,10 +42,10 @@ function test() $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number" $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum" $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum" $CH_CLIENT -q "drop table test" } From c9b019d392c4fa3e2f25a2921383711fc2c93ce5 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Apr 2024 18:46:38 +0000 Subject: [PATCH 110/651] Mark ColumnDynamic constructor explicit --- src/Columns/ColumnDynamic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 4e9c7edd5f9..c6626433877 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -52,7 +52,7 @@ private: std::unordered_map variant_name_to_discriminator; }; - ColumnDynamic(size_t max_dynamic_types_); + explicit ColumnDynamic(size_t max_dynamic_types_); ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {}); public: From 3b9f593524ba27105864464f41d8b3e858d163f9 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Apr 2024 19:00:32 +0000 Subject: [PATCH 111/651] Fix type in code, add more docs --- docs/en/sql-reference/data-types/dynamic.md | 256 +++++++++++++++++++- src/Storages/ColumnsDescription.cpp | 2 +- 2 files changed, 256 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index e20bdad1e79..e3cade25b55 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -106,6 +106,7 @@ SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`), ```sql SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;``` +``` ```text ┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐ @@ -139,7 +140,7 @@ SELECT dynamicType(d) from test; There are 4 possible conversions that can be performed with `Dynamic` column. -### Converting an ordinary column to a Variant column +### Converting an ordinary column to a Dynamic column ```sql SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); @@ -151,7 +152,260 @@ SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); └───────────────┴────────────────┘ ``` +### Converting a String column to a Dynamic column through parsing +To parse `Dynamic` type values from a `String` column you can enable setting `cast_string_to_dynamic_use_inference`: +```sql +SET cast_string_to_dynamic_use_inference = 1; +SELECT CAST(materialize(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01')), 'Map(String, Dynamic)') as map_of_dynamic, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamic) as map_of_dynamic_types; +``` +```text +┌─map_of_dynamic──────────────────────────────┬─map_of_dynamic_types─────────────────────────┐ +│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'Int64','key2':'Bool','key3':'Date'} │ +└─────────────────────────────────────────────┴──────────────────────────────────────────────┘ +``` + +### Converting a Dynamic column to an ordinary column + +It is possible to convert a `Dynamic` column to an ordinary column. In this case all nested types will be converted to a destination type: + +```sql +CREATE TABLE test (d Dynamic) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('42.42'), (true), ('e10'); +SELECT d::Nullable(Float64) FROM test; +``` + +```text +┌─CAST(d, 'Nullable(Float64)')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ 42.42 │ +│ 1 │ +│ 0 │ +└──────────────────────────────┘ +``` + +### Converting a Variant column to Dynamic column + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('String'), ([1, 2, 3]); +SELECT v::Dynamic as d, dynamicType(d) from test; +``` + +```text +┌─d───────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ UInt64 │ +│ String │ String │ +│ [1,2,3] │ Array(UInt64) │ +└─────────┴────────────────┘ +``` + +### Converting a Dynamic(max_types=N) column to another Dynamic(max_types=K) + +If `K >= N` than during conversion the data doesn't change: + +```sql +CREATE TABLE test (d Dynamic(max_types=3)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true); +SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d─────┬─dynamicType(d)─┐ +│ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ +│ 43 │ Int64 │ +│ 42.42 │ String │ +│ true │ Bool │ +└───────┴────────────────┘ +``` + +If `K < N`, then the values with the rarest types are converted to `String`: +```text +CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); +SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ 42 │ Int64 │ +│ 43 │ Int64 │ 43 │ Int64 │ +│ 42.42 │ String │ 42.42 │ String │ +│ true │ Bool │ true │ String │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ +└─────────┴────────────────┴─────────┴─────────────────┘ +``` + +If `K=1`, all types are converted to `String`: + +```text +CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]); +SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test; +``` + +```text +┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐ +│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ +│ 42 │ Int64 │ 42 │ String │ +│ 43 │ Int64 │ 43 │ String │ +│ 42.42 │ String │ 42.42 │ String │ +│ true │ Bool │ true │ String │ +│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │ +└─────────┴────────────────┴─────────┴─────────────────┘ +``` + +## Reading Variant type from the data + +All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Dynamic` type. During data parsing ClickHouse tries to infer the type of each value and use it during insertion to `Dynamic` column. + +Example: + +```sql +SELECT + d, + dynamicType(d), + dynamicElement(d, 'String') AS str, + dynamicElement(d, 'Int64') AS num, + dynamicElement(d, 'Float64') AS float, + dynamicElement(d, 'Date') AS date, + dynamicElement(d, 'Array(Int64)') AS arr +FROM format(JSONEachRow, 'd Dynamic', $$ +{"d" : "Hello, World!"}, +{"d" : 42}, +{"d" : 42.42}, +{"d" : "2020-01-01"}, +{"d" : [1, 2, 3]} +$$) +``` + +```text +┌─d─────────────┬─dynamicType(d)─┬─str───────────┬──num─┬─float─┬───────date─┬─arr─────┐ +│ Hello, World! │ String │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42.42 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ +│ 2020-01-01 │ Date │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 │ [] │ +│ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴────────────────┴───────────────┴──────┴───────┴────────────┴─────────┘ +``` + +## Comparing values of Dynamic type + +Values of `Dynamic` types are compared similar to values of `Variant` type: +The result of operator `<` for values `d1` with underlying type `T1` and `d2` with underlying type `T2` of a type `Dynamic` is defined as follows: +- If `T1 = T2 = T`, the result will be `d1.T < d2.T` (underlying values will be compared). +- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared). + +Examples: +```sql +CREATE TABLE test (d1 Dynamic, d2 Dynamic) ENGINE=Memory; +INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL); +``` + +```sql +SELECT d2, dynamicType(d2) as d2_type from test order by d2; +``` + +```text +┌─d2──────┬─d2_type──────┐ +│ [] │ Array(Int64) │ +│ [1,2,3] │ Array(Int64) │ +│ 42 │ Int64 │ +│ 43 │ Int64 │ +│ abc │ String │ +│ ᴺᵁᴸᴸ │ None │ +└─────────┴──────────────┘ +``` + +```sql +SELECT d1, dynamicType(d1) as d1_type, d2, dynamicType(d2) as d2_type, d1 = d2, d1 < d2, d1 > d2 from test; +``` + +```text +┌─d1─┬─d1_type─┬─d2──────┬─d2_type──────┬─equals(d1, d2)─┬─less(d1, d2)─┬─greater(d1, d2)─┐ +│ 42 │ Int64 │ 42 │ Int64 │ 1 │ 0 │ 0 │ +│ 42 │ Int64 │ 43 │ Int64 │ 0 │ 1 │ 0 │ +│ 42 │ Int64 │ abc │ String │ 0 │ 1 │ 0 │ +│ 42 │ Int64 │ [1,2,3] │ Array(Int64) │ 0 │ 0 │ 1 │ +│ 42 │ Int64 │ [] │ Array(Int64) │ 0 │ 0 │ 1 │ +│ 42 │ Int64 │ ᴺᵁᴸᴸ │ None │ 0 │ 1 │ 0 │ +└────┴─────────┴─────────┴──────────────┴────────────────┴──────────────┴─────────────────┘ +``` + +If you need to find the row with specific `Dynamic` value, you can do one of the following: + +- Cast value to the `Dynamic` type: + +```sql +SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic; +``` + +```text +┌─d1─┬─d2──────┐ +│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +- Compare `Dynamic` subcolumn with required type: + +```sql +SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') +``` + +```text +┌─d1─┬─d2──────┐ +│ 42 │ [1,2,3] │ +└────┴─────────┘ +``` + +Sometimes it can be useful to make additional check on dynamic type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types: + +```sql +SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE d2.`Array(Int64)` == []; +``` + +```text +┌─d2───┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐ +│ 42 │ [] │ Int64 │ +│ 43 │ [] │ Int64 │ +│ abc │ [] │ String │ +│ [] │ [] │ Array(Int32) │ +│ ᴺᵁᴸᴸ │ [] │ None │ +└──────┴──────────────────┴─────────────────┘ +``` + +```sql +SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE dynamicType(d2) == 'Array(Int64)' AND d2.`Array(Int64)` == []; +``` + +```text +┌─d2─┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐ +│ [] │ [] │ Array(Int64) │ +└────┴──────────────────┴─────────────────┘ +``` + +**Note:** values of dynamic types with different numeric types are considered as different values and not compared between each other, their type names are compared instead. + +Example: + +```sql +CREATE TABLE test (d Dynamic) ENGINE=Memory; +INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64); +SELECT d, dynamicType(d) FROM test ORDER by d; +``` + +```text +┌─v───┬─dynamicType(v)─┐ +│ 1 │ Int64 │ +│ 100 │ Int64 │ +│ 1 │ UInt32 │ +│ 100 │ UInt32 │ +└─────┴────────────────┘ +``` diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3a3ee0d1d14..4cf66649ad1 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -656,7 +656,7 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns return *jt; } - /// Check for dynmaic subcolumns. + /// Check for dynamic subcolumns. auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); it = columns.get<1>().find(ordinary_column_name); if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) From 05ba142ba1a0437d4dd114df7770e48a73c78ec3 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 1 May 2024 13:00:27 +0200 Subject: [PATCH 112/651] fix tests --- src/Storages/MergeTree/MergeTreeData.cpp | 42 +++++++++++++----------- src/Storages/MergeTree/MergeTreeData.h | 4 +-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index bbf89a87282..05cc28e30e5 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1142,7 +1142,14 @@ Int64 MergeTreeData::getMaxBlockNumber() const return max_block_num; } -void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk) +inline bool isSuspectedDroppedRage(std::optional> expected_parts, const MergeTreePartInfo & info, const MergeTreePartInfo & prev_info) +{ + if (expected_parts == std::nullopt || expected_parts->contains(info.getPartNameV1())) + return false; + return info.min_block > prev_info.min_block && info.max_block < prev_info.max_block; +} + +void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk, std::optional> expected_parts) { auto & current_ptr = root_by_partition[info.partition_id]; if (!current_ptr) @@ -1164,6 +1171,13 @@ void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const S } else if (!prev_info.isDisjoint(info)) { + if (isSuspectedDroppedRage(expected_parts, info, prev_info)) + { + LOG_INFO(getLogger("PartLoadingTree"), "Found part {} is covered by {} but it's level is higher. It's possible to be dropped before restart.", + info.getPartNameV1(), prev_info.getPartNameV1()); + current = prev->second.get(); + continue; + } throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the server or ZooKeeper data", name, prev->second->name); @@ -1209,16 +1223,18 @@ void MergeTreeData::PartLoadingTree::traverse(bool recursive, Func && func) } MergeTreeData::PartLoadingTree -MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes) +MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes, std::optional> expected_parts) { std::sort(nodes.begin(), nodes.end(), [](const auto & lhs, const auto & rhs) { - return std::tie(lhs.info.level, lhs.info.mutation) > std::tie(rhs.info.level, rhs.info.mutation); + /// If a part is dropped by drop-range, it's possbile to be covered by a smaller level part. + /// In case, we use max_block - min_block, which is more accurate. + return std::make_pair(lhs.info.max_block-lhs.info.min_block, lhs.info.mutation) > std::make_pair(rhs.info.max_block-rhs.info.min_block, rhs.info.mutation); }); PartLoadingTree tree; for (const auto & [info, name, disk] : nodes) - tree.add(info, name, disk); + tree.add(info, name, disk, expected_parts); return tree; } @@ -1656,7 +1672,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional parts_to_load_by_disk(disks.size()); - std::vector unexpected_parts_to_load_by_disk(disks.size()); ThreadPoolCallbackRunnerLocal runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); @@ -1667,7 +1682,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), format_version)) - { - if (expected_parts && !expected_parts->contains(it->name())) - unexpected_disk_parts.emplace_back(*part_info, it->name(), disk_ptr); - else - disk_parts.emplace_back(*part_info, it->name(), disk_ptr); - } + disk_parts.emplace_back(*part_info, it->name(), disk_ptr); } }, Priority{0}); } @@ -1695,11 +1704,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional(info, name, disk)); - loading_tree.traverse(/*recursive=*/ true, [&](const auto & node) { if (!node->is_loaded) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 3b7ff1454a5..1fff6ed1ebf 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1519,7 +1519,7 @@ protected: using PartLoadingInfos = std::vector; /// Builds a tree from the list of part infos. - static PartLoadingTree build(PartLoadingInfos nodes); + static PartLoadingTree build(PartLoadingInfos nodes, std::optional> expected_parts); /// Traverses a tree and call @func on each node. /// If recursive is false traverses only the top level. @@ -1529,7 +1529,7 @@ protected: private: /// NOTE: Parts should be added in descending order of their levels /// because rearranging tree to the new root is not supported. - void add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk); + void add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk, std::optional> expected_parts); std::unordered_map root_by_partition; }; From 1275b2a1b47ddd8f1804ba0783e96aca5566eb27 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 1 May 2024 13:18:08 +0200 Subject: [PATCH 113/651] fix style --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 05cc28e30e5..ccb35cb697d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1227,7 +1227,7 @@ MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes, std::optional std::make_pair(rhs.info.max_block-rhs.info.min_block, rhs.info.mutation); }); From b6aab2d02adb6d13428355316e1dffc7e8cd610d Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 1 May 2024 13:41:01 +0200 Subject: [PATCH 114/651] fix fast test --- src/Storages/MergeTree/MergeTreeData.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ccb35cb697d..ba924920836 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1228,8 +1228,9 @@ MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes, std::optional std::make_pair(rhs.info.max_block-rhs.info.min_block, rhs.info.mutation); + /// So we compare max_block - min_block first, for equal block range, we compare the level in case of ttl. + return std::make_tuple(lhs.info.max_block-lhs.info.min_block, lhs.info.level, lhs.info.mutation) > + std::make_tuple(rhs.info.max_block-rhs.info.min_block, rhs.info.level, rhs.info.mutation); }); PartLoadingTree tree; From b057b54d24f948c10f532e413bf59fb404f69045 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 2 May 2024 11:24:33 +0000 Subject: [PATCH 115/651] ValidateGroupByColumnsVisitor::areColumnSourcesEqual --- src/Analyzer/ValidationUtils.cpp | 57 ++++++++++++++++++- .../03130_analyzer_self_join_group_by.sql | 5 ++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp index 60cc1dd521f..3cda2d071f3 100644 --- a/src/Analyzer/ValidationUtils.cpp +++ b/src/Analyzer/ValidationUtils.cpp @@ -134,6 +134,52 @@ public: } private: + + static bool areColumnSourcesEqual(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs) + { + using NodePair = std::pair; + std::vector nodes_to_process; + nodes_to_process.emplace_back(lhs.get(), rhs.get()); + + while (!nodes_to_process.empty()) + { + const auto [lhs_node, rhs_node] = nodes_to_process.back(); + nodes_to_process.pop_back(); + + if (lhs_node->getNodeType() != rhs_node->getNodeType()) + return false; + + if (lhs_node->getNodeType() == QueryTreeNodeType::COLUMN) + { + auto * lhs_column_node = lhs_node->as(); + auto * rhs_column_node = rhs_node->as(); + if (!lhs_column_node->getColumnSource()->isEqual(*rhs_column_node->getColumnSource())) + return false; + } + + const auto & lhs_children = lhs_node->getChildren(); + const auto & rhs_children = rhs_node->getChildren(); + if (lhs_children.size() != rhs_children.size()) + return false; + + for (size_t i = 0; i < lhs_children.size(); ++i) + { + const auto & lhs_child = lhs_children[i]; + const auto & rhs_child = rhs_children[i]; + + if (!lhs_child && !rhs_child) + continue; + else if (lhs_child && !rhs_child) + return false; + else if (!lhs_child && rhs_child) + return false; + + nodes_to_process.emplace_back(lhs_child.get(), rhs_child.get()); + } + } + return true; + } + bool nodeIsAggregateFunctionOrInGroupByKeys(const QueryTreeNodePtr & node) const { if (auto * function_node = node->as()) @@ -141,8 +187,17 @@ private: return true; for (const auto & group_by_key_node : group_by_keys_nodes) + { if (node->isEqual(*group_by_key_node, {.compare_aliases = false})) - return true; + { + /** Column sources shoul be compared with aliases for correct GROUP BY keys validation, + * otherwise t2.x and t1.x will be considered as the same column: + * SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; + */ + if (areColumnSourcesEqual(node, group_by_key_node)) + return true; + } + } return false; } diff --git a/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql index 562855ad954..66b6b99981b 100644 --- a/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql +++ b/tests/queries/0_stateless/03130_analyzer_self_join_group_by.sql @@ -5,6 +5,11 @@ INSERT INTO t1 VALUES (1), (2), (3); SET allow_experimental_analyzer = 1; SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.number FROM numbers(10) as t1 JOIN numbers(10) as t2 ON t1.number = t2.number GROUP BY t1.number; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.a FROM (SELECT x as a FROM t1) as t1 JOIN (SELECT x as a FROM t1) as t2 ON t1.a = t2.a GROUP BY t1.a; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.a FROM (SELECT x as a FROM t1 UNION ALL SELECT x as a FROM t1) as t1 JOIN (SELECT x as a FROM t1 UNION ALL SELECT x as a FROM t1) as t2 ON t1.a = t2.a GROUP BY t1.a; -- { serverError NOT_AN_AGGREGATE } +SELECT t2.number FROM numbers(10) JOIN numbers(10) as t2 ON number = t2.number GROUP BY number SETTINGS joined_subquery_requires_alias = 0; -- { serverError NOT_AN_AGGREGATE } + SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE } SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t0.x = t2.x GROUP BY t0.x; -- { serverError NOT_AN_AGGREGATE } SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x; -- { serverError NOT_AN_AGGREGATE } From a8c931ac986580722bb7d2bcbdc3c697c91242d2 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 May 2024 15:16:40 +0200 Subject: [PATCH 116/651] fix typo --- src/Analyzer/ValidationUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp index 3cda2d071f3..682ab55a20e 100644 --- a/src/Analyzer/ValidationUtils.cpp +++ b/src/Analyzer/ValidationUtils.cpp @@ -190,7 +190,7 @@ private: { if (node->isEqual(*group_by_key_node, {.compare_aliases = false})) { - /** Column sources shoul be compared with aliases for correct GROUP BY keys validation, + /** Column sources should be compared with aliases for correct GROUP BY keys validation, * otherwise t2.x and t1.x will be considered as the same column: * SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; */ From 2c22593205dfc0843200395c4ac5782112b055e3 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 May 2024 18:34:14 +0200 Subject: [PATCH 117/651] fix style --- src/Analyzer/ValidationUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp index 682ab55a20e..03a9679668a 100644 --- a/src/Analyzer/ValidationUtils.cpp +++ b/src/Analyzer/ValidationUtils.cpp @@ -151,8 +151,8 @@ private: if (lhs_node->getNodeType() == QueryTreeNodeType::COLUMN) { - auto * lhs_column_node = lhs_node->as(); - auto * rhs_column_node = rhs_node->as(); + const auto * lhs_column_node = lhs_node->as(); + const auto * rhs_column_node = rhs_node->as(); if (!lhs_column_node->getColumnSource()->isEqual(*rhs_column_node->getColumnSource())) return false; } From ff6fa4bf6e414caa7cd483a3155d38187ceaf3f5 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 3 May 2024 17:03:16 +0000 Subject: [PATCH 118/651] fix unit tests for asyncloader --- src/Common/tests/gtest_async_loader.cpp | 36 ++++++++++++++++--------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 174997ddf14..304fa996934 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -262,7 +262,8 @@ TEST(AsyncLoader, CancelPendingJob) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } @@ -288,7 +289,8 @@ TEST(AsyncLoader, CancelPendingTask) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } try @@ -298,7 +300,8 @@ TEST(AsyncLoader, CancelPendingTask) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } @@ -325,7 +328,8 @@ TEST(AsyncLoader, CancelPendingDependency) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } try @@ -335,7 +339,8 @@ TEST(AsyncLoader, CancelPendingDependency) } catch (Exception & e) { - ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } @@ -451,8 +456,9 @@ TEST(AsyncLoader, JobFailure) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_FAILED); - ASSERT_TRUE(e.message().find(error_message) != String::npos); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains(error_message)); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_FAILED")); } } @@ -489,8 +495,9 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); - ASSERT_TRUE(e.message().find(error_message) != String::npos); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); + ASSERT_TRUE(e.message().contains(error_message)); } try { @@ -499,8 +506,9 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); - ASSERT_TRUE(e.message().find(error_message) != String::npos); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); + ASSERT_TRUE(e.message().contains(error_message)); } } @@ -531,7 +539,8 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } try { @@ -540,7 +549,8 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies) } catch (Exception & e) { - ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED); + ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED); + ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED")); } } From c90e04ed4be9c6f8cf274eabf9f0d10c27102c83 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 6 May 2024 11:40:45 +0000 Subject: [PATCH 119/651] fix tests build --- src/Common/tests/gtest_async_loader.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 304fa996934..9fda58b9008 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -35,6 +35,7 @@ namespace DB::ErrorCodes extern const int ASYNC_LOAD_CYCLE; extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_CANCELED; + extern const int ASYNC_LOAD_WAIT_FAILED; } struct Initializer { From ed63ad5e61916f38fa8db2143aaa755d49665584 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Tue, 7 May 2024 14:10:49 +0300 Subject: [PATCH 120/651] iouring: refactor get from context --- src/Coordination/Standalone/Context.cpp | 3 +- src/Disks/IO/IOUringReader.cpp | 2 +- src/Disks/IO/createReadBufferFromFileBase.cpp | 11 +---- src/Disks/IO/getIOUringReader.cpp | 40 +++++++++++++++++++ src/Disks/IO/getIOUringReader.h | 19 +++++++++ src/Interpreters/Context.cpp | 3 +- src/Storages/StorageFile.cpp | 6 +-- 7 files changed, 68 insertions(+), 16 deletions(-) create mode 100644 src/Disks/IO/getIOUringReader.cpp create mode 100644 src/Disks/IO/getIOUringReader.h diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 1095a11566f..84e54ed7100 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -306,7 +307,7 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co IOUringReader & Context::getIOURingReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = std::make_unique(512); + shared->io_uring_reader = createIOUringReader(); }); return *shared->io_uring_reader; diff --git a/src/Disks/IO/IOUringReader.cpp b/src/Disks/IO/IOUringReader.cpp index 90a4d285ecb..6b0e3f8cc89 100644 --- a/src/Disks/IO/IOUringReader.cpp +++ b/src/Disks/IO/IOUringReader.cpp @@ -1,5 +1,4 @@ #include "IOUringReader.h" -#include #if USE_LIBURING @@ -13,6 +12,7 @@ #include #include #include +#include namespace ProfileEvents { diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index a9d451496ff..f3bb6ae1740 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -4,9 +4,9 @@ #include #include #include +#include #include #include -#include #include #include #include "config.h" @@ -100,14 +100,7 @@ std::unique_ptr createReadBufferFromFileBase( else if (settings.local_fs_method == LocalFSReadMethod::io_uring) { #if USE_LIBURING - auto global_context = Context::getGlobalContextInstance(); - if (!global_context) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot obtain io_uring reader (global context not initialized)"); - - auto & reader = global_context->getIOURingReader(); - if (!reader.isSupported()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); - + auto & reader = getIOURingReaderOrThrow(); res = std::make_unique( reader, settings.priority, diff --git a/src/Disks/IO/getIOUringReader.cpp b/src/Disks/IO/getIOUringReader.cpp new file mode 100644 index 00000000000..8e9a9655a41 --- /dev/null +++ b/src/Disks/IO/getIOUringReader.cpp @@ -0,0 +1,40 @@ +#include "getIOUringReader.h" + +#if USE_LIBURING + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; +} + +std::unique_ptr createIOUringReader() +{ + return std::make_unique(512); +} + +IOUringReader & getIOUringReaderOrThrow(ContextPtr context) +{ + auto reader = context->getIOUringReader(); + if (!reader.isSupported) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); + } + return reader; +} + +IOUringReader & getIOUringReaderOrThrow() +{ + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + return getIOUringReaderOrThrow(context) +} + +} +#endif diff --git a/src/Disks/IO/getIOUringReader.h b/src/Disks/IO/getIOUringReader.h new file mode 100644 index 00000000000..0980f32b5a2 --- /dev/null +++ b/src/Disks/IO/getIOUringReader.h @@ -0,0 +1,19 @@ +#pragma once + +#include "config.h" + +#if USE_LIBURING + +#include + +namespace DB +{ + +std::unique_ptr createIOUringReader(); + +IOUringReader & getIOUringReaderOrThrow(ContextPtr); + +IOUringReader & getIOUringReaderOrThrow(); + +} +#endif diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 44d36e94441..d847cab013c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -5178,7 +5179,7 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co IOUringReader & Context::getIOURingReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = std::make_unique(512); + shared->io_uring_reader = createIOUringReader() }); return *shared->io_uring_reader; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 430e68d8562..9bead6d0267 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -282,10 +283,7 @@ std::unique_ptr selectReadBuffer( else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) { #if USE_LIBURING - auto & reader = context->getIOURingReader(); - if (!reader.isSupported()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); - + auto & reader = getIOURingReaderOrThrow(context); res = std::make_unique( reader, Priority{}, From 936f94d286f50133cf12ba449245502769a22e40 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 7 May 2024 14:40:45 +0200 Subject: [PATCH 121/651] Add print --- utils/keeper-bench/Runner.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 8b111f5adb9..a893dac3851 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -635,11 +635,14 @@ struct ZooKeeperRequestFromLogReader break; } case Coordination::OpNum::Check: + case Coordination::OpNum::CheckNotExists: { auto check_request = std::make_shared(); check_request->path = current_block->getPath(idx_in_block); if (auto version = current_block->getVersion(idx_in_block)) check_request->version = *version; + if (op_num == Coordination::OpNum::CheckNotExists) + check_request->not_exists = true; request_from_log.request = check_request; break; } @@ -868,10 +871,20 @@ void Runner::runBenchmarkFromLog() } ZooKeeperRequestFromLogReader request_reader(input_request_log, global_context); + + delay_watch.restart(); while (auto request_from_log = request_reader.getNextRequest()) { request_from_log->connection = get_zookeeper_connection(request_from_log->session_id); push_request(std::move(*request_from_log)); + + if (delay > 0 && delay_watch.elapsedSeconds() > delay) + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + std::cerr << std::endl; + delay_watch.restart(); + } } } From f57abbd806ef78be7829844e3f285b994661ca5e Mon Sep 17 00:00:00 2001 From: tomershafir Date: Tue, 7 May 2024 17:27:52 +0300 Subject: [PATCH 122/651] add missing include --- src/Disks/IO/getIOUringReader.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/IO/getIOUringReader.h b/src/Disks/IO/getIOUringReader.h index 0980f32b5a2..ca619785ab4 100644 --- a/src/Disks/IO/getIOUringReader.h +++ b/src/Disks/IO/getIOUringReader.h @@ -5,6 +5,7 @@ #if USE_LIBURING #include +#include namespace DB { From c99f15a843fb8cae05a111f90b185433224481ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 7 May 2024 17:36:55 +0200 Subject: [PATCH 123/651] Insert QueryFinish on AsyncInsertFlush with no data --- src/Interpreters/AsynchronousInsertQueue.cpp | 47 ++++++++------- ...sync_queries_in_query_log_errors.reference | 44 ++++++++++++++ ...03148_async_queries_in_query_log_errors.sh | 60 +++++++++++++++++++ 3 files changed, 129 insertions(+), 22 deletions(-) create mode 100644 tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference create mode 100755 tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 0bad3f7ed16..ab29c64184d 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -667,11 +667,7 @@ namespace using TimePoint = std::chrono::time_point; void appendElementsToLogSafe( - AsynchronousInsertLog & log, - std::vector elements, - TimePoint flush_time, - const String & flush_query_id, - const String & flush_exception) + AsynchronousInsertLog & log, std::vector elements, TimePoint flush_time, const String & flush_exception) try { using Status = AsynchronousInsertLogElement::Status; @@ -680,7 +676,6 @@ try { elem.flush_time = timeInSeconds(flush_time); elem.flush_time_microseconds = timeInMicroseconds(flush_time); - elem.flush_query_id = flush_query_id; elem.exception = flush_exception; elem.status = flush_exception.empty() ? Status::Ok : Status::FlushError; log.add(std::move(elem)); @@ -808,12 +803,12 @@ try throw; } - auto add_entry_to_log = [&](const auto & entry, - const auto & entry_query_for_logging, - const auto & exception, - size_t num_rows, - size_t num_bytes, - Milliseconds timeout_ms) + auto add_entry_to_asynchronous_insert_log = [&](const auto & entry, + const auto & entry_query_for_logging, + const auto & exception, + size_t num_rows, + size_t num_bytes, + Milliseconds timeout_ms) { if (!async_insert_log) return; @@ -831,6 +826,7 @@ try elem.exception = exception; elem.data_kind = entry->chunk.getDataKind(); elem.timeout_milliseconds = timeout_ms.count(); + elem.flush_query_id = insert_query_id; /// If there was a parsing error, /// the entry won't be flushed anyway, @@ -857,7 +853,7 @@ try if (!log_elements.empty()) { auto flush_time = std::chrono::system_clock::now(); - appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, ""); + appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, ""); } }; @@ -865,15 +861,27 @@ try auto header = pipeline.getHeader(); if (key.data_kind == DataKind::Parsed) - chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_log); + chunk = processEntriesWithParsing(key, data, header, insert_context, log, add_entry_to_asynchronous_insert_log); else - chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_log); + chunk = processPreprocessedEntries(key, data, header, insert_context, add_entry_to_asynchronous_insert_log); ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows()); + auto log_and_add_finish_to_query_log = [&](size_t num_rows, size_t num_bytes) + { + LOG_DEBUG(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str); + queue_shard_flush_time_history.updateWithCurrentTime(); + + bool pulling_pipeline = false; + logQueryFinish( + query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); + }; + + if (chunk.getNumRows() == 0) { finish_entries(); + log_and_add_finish_to_query_log(0, 0); return; } @@ -888,12 +896,7 @@ try CompletedPipelineExecutor completed_executor(pipeline); completed_executor.execute(); - LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str); - - queue_shard_flush_time_history.updateWithCurrentTime(); - - bool pulling_pipeline = false; - logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); + log_and_add_finish_to_query_log(num_rows, num_bytes); } catch (...) { @@ -903,7 +906,7 @@ try { auto exception = getCurrentExceptionMessage(false); auto flush_time = std::chrono::system_clock::now(); - appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, exception); + appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, exception); } throw; } diff --git a/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference new file mode 100644 index 00000000000..685d28268f6 --- /dev/null +++ b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.reference @@ -0,0 +1,44 @@ + +system.asynchronous_insert_log +Row 1: +────── +database: default +table: async_insert_landing +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values +format: Values +error: DB::Exc*****on: Cannot parse string 'Invalid' as UInt32: +populated_flush_query_id: 1 +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 + +Row 2: +────── +type: QueryFinish +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 0, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 diff --git a/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh new file mode 100755 index 00000000000..9c290133bf9 --- /dev/null +++ b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function print_flush_query_logs() +{ + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + echo "" + + echo "system.asynchronous_insert_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + database, + table, + query, + format, + extract(replace(exception::String, 'Exception', 'Exc*****on'), '.*UInt32:') as error, + not empty(flush_query_id) as populated_flush_query_id + FROM system.asynchronous_insert_log + WHERE + event_date >= yesterday() + AND query_id = '$1' + AND database = currentDatabase() + FORMAT Vertical" + + echo "system.query_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + replace(type::String, 'Exception', 'Exc*****on') as type, + read_rows, + read_bytes, + written_rows, + written_bytes, + result_rows, + result_bytes, + query, + query_kind, + databases, + tables, + columns, + views, + exception_code + FROM system.query_log + WHERE + event_date >= yesterday() + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') + -- AND current_database = currentDatabase() -- Just to silence style check: this is not ok for this test since the query uses default values + ORDER BY type DESC + FORMAT Vertical" +} + + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_insert_landing (id UInt32) ENGINE = MergeTree ORDER BY id" + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=0, async_insert=1 values ('Invalid')" 2>/dev/null || true +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +print_flush_query_logs ${query_id} From 13b312092c4f2b06092403dc6db0077e700f88d2 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 7 May 2024 23:54:49 +0200 Subject: [PATCH 124/651] rewrite load part logic --- src/Common/CurrentMetrics.cpp | 3 + src/IO/SharedThreadPools.cpp | 6 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 27 ++ src/Storages/MergeTree/IMergeTreeDataPart.h | 2 + src/Storages/MergeTree/MergeTreeData.cpp | 234 +++++++++++++++--- src/Storages/MergeTree/MergeTreeData.h | 32 ++- src/Storages/StorageReplicatedMergeTree.cpp | 36 ++- 7 files changed, 280 insertions(+), 60 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 0f25397a961..34e9daad488 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -177,6 +177,9 @@ M(MergeTreeOutdatedPartsLoaderThreads, "Number of threads in the threadpool for loading Outdated data parts.") \ M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \ M(MergeTreeOutdatedPartsLoaderThreadsScheduled, "Number of queued or active jobs in the threadpool for loading Outdated data parts.") \ + M(MergeTreeUnexpectedPartsLoaderThreads, "Number of threads in the threadpool for loading Unexpected data parts.") \ + M(MergeTreeUnexpectedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Unexpected data parts.") \ + M(MergeTreeUnexpectedPartsLoaderThreadsScheduled, "Number of queued or active jobs in the threadpool for loading Unexpected data parts.") \ M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \ M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \ M(MergeTreePartsCleanerThreadsScheduled, "Number of queued or active jobs in the MergeTree parts cleaner thread pool.") \ diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp index 2ea30400ad9..da5ed0cbe67 100644 --- a/src/IO/SharedThreadPools.cpp +++ b/src/IO/SharedThreadPools.cpp @@ -151,6 +151,12 @@ StaticThreadPool & getOutdatedPartsLoadingThreadPool() return instance; } +StaticThreadPool & getUnexpectedPartsLoadingThreadPool() +{ + static StaticThreadPool instance("MergeTreeUnexpectedPartsLoaderThreadPool", CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreads, CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreadsActive, CurrentMetrics::MergeTreeUnexpectedPartsLoaderThreadsScheduled); + return instance; +} + StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool() { static StaticThreadPool instance("CreateTablesThreadPool", CurrentMetrics::DatabaseReplicatedCreateTablesThreads, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsActive, CurrentMetrics::DatabaseReplicatedCreateTablesThreadsScheduled); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 441437855ab..8f3840b9a3f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1257,6 +1257,33 @@ void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files) files.push_back("checksums.txt"); } +void IMergeTreeDataPart::loadRowsCountFileForUnexpectedPart() +{ + auto read_rows_count = [&]() + { + auto buf = metadata_manager->read("count.txt"); + readIntText(rows_count, *buf); + assertEOF(*buf); + }; + if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part) + { + if(metadata_manager->exists("count.txt")) + { + read_rows_count(); + return; + } + } + else + { + if (getDataPartStorage().exists("count.txt")) + { + read_rows_count(); + return; + } + } + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No count.txt in part {}", name); +} + void IMergeTreeDataPart::loadRowsCount() { auto read_rows_count = [&]() diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 7519980a7a3..2524f5f55dd 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -183,6 +183,8 @@ public: void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency); void appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection = false) const; + void loadRowsCountFileForUnexpectedPart(); + String getMarksFileExtension() const { return index_granularity_info.mark_type.getFileExtension(); } /// Generate the new name for this part according to `new_part_info` and min/max dates from the old name. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ba924920836..cad0f8aaa4a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1142,14 +1142,7 @@ Int64 MergeTreeData::getMaxBlockNumber() const return max_block_num; } -inline bool isSuspectedDroppedRage(std::optional> expected_parts, const MergeTreePartInfo & info, const MergeTreePartInfo & prev_info) -{ - if (expected_parts == std::nullopt || expected_parts->contains(info.getPartNameV1())) - return false; - return info.min_block > prev_info.min_block && info.max_block < prev_info.max_block; -} - -void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk, std::optional> expected_parts) +void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk) { auto & current_ptr = root_by_partition[info.partition_id]; if (!current_ptr) @@ -1171,13 +1164,6 @@ void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const S } else if (!prev_info.isDisjoint(info)) { - if (isSuspectedDroppedRage(expected_parts, info, prev_info)) - { - LOG_INFO(getLogger("PartLoadingTree"), "Found part {} is covered by {} but it's level is higher. It's possible to be dropped before restart.", - info.getPartNameV1(), prev_info.getPartNameV1()); - current = prev->second.get(); - continue; - } throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the server or ZooKeeper data", name, prev->second->name); @@ -1223,19 +1209,16 @@ void MergeTreeData::PartLoadingTree::traverse(bool recursive, Func && func) } MergeTreeData::PartLoadingTree -MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes, std::optional> expected_parts) +MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes) { std::sort(nodes.begin(), nodes.end(), [](const auto & lhs, const auto & rhs) { - /// If a part is dropped by drop-range, it's possible to be covered by a smaller level part. - /// So we compare max_block - min_block first, for equal block range, we compare the level in case of ttl. - return std::make_tuple(lhs.info.max_block-lhs.info.min_block, lhs.info.level, lhs.info.mutation) > - std::make_tuple(rhs.info.max_block-rhs.info.min_block, rhs.info.level, rhs.info.mutation); + return std::tie(lhs.info.level, lhs.info.mutation) > std::tie(rhs.info.level, rhs.info.mutation); }); PartLoadingTree tree; for (const auto & [info, name, disk] : nodes) - tree.add(info, name, disk, expected_parts); + tree.add(info, name, disk); return tree; } @@ -1281,6 +1264,50 @@ static constexpr size_t loading_parts_initial_backoff_ms = 100; static constexpr size_t loading_parts_max_backoff_ms = 5000; static constexpr size_t loading_parts_max_tries = 3; +void MergeTreeData::loadUnexpectedDataPart(UnexpectedPartLoadState & state) +{ + const MergeTreePartInfo & part_info = state.loading_info->info; + const String & part_name = state.loading_info->name; + const DiskPtr & part_disk_ptr = state.loading_info->disk; + LOG_TRACE(log, "Loading {} unexpected part {} from disk {}", magic_enum::enum_name(to_state), part_name, part_disk_ptr->getName()); + + LoadPartResult res; + auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); + auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, part_name); + String part_path = fs::path(relative_data_path) / part_name; + + try + { + state.part = getDataPartBuilder(part_name, single_disk_volume, part_name) + .withPartInfo(part_info) + .withPartFormatFromDisk() + .build(); + + state.part->loadRowsCountFileForUnexpectedPart(); + + state.is_empty = part->isEmpty(); + } + catch (...) + { + LOG_DEBUG(log, "Failed to load data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false)); + if (!state.part) + { + /// Build a fake part and mark it as broken in case of filesystem error. + /// If the error impacts part directory instead of single files, + /// an exception will be thrown during detach and silently ignored. + state.part = getDataPartBuilder(part_name, single_disk_volume, part_name) + .withPartStorageType(MergeTreeDataPartStorageType::Full) + .withPartType(MergeTreeDataPartType::Wide) + .build(); + } + + state.is_broken = true; + tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part_name, part_path)); + + state.is_empty = calculatePartSizeSafe(res.part, log.load()) == 0; + } +} + MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( const MergeTreePartInfo & part_info, const String & part_name, @@ -1673,6 +1700,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional parts_to_load_by_disk(disks.size()); + std::vector unexpected_parts_to_load_by_disk(disks.size()); ThreadPoolCallbackRunnerLocal runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); @@ -1683,6 +1711,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), format_version)) - disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + { + if (expected_parts && !expected_parts->contains(it->name())) + unexpected_disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + else + disk_parts.emplace_back(*part_info, it->name(), disk_ptr); + } } }, Priority{0}); } @@ -1705,8 +1739,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional unexpected_unloaded_data_parts; + for (const auto & [info, name, disk] : unexpected_parts_to_load) + { + bool uncovered = true; + for (const auto & part : unexpected_parts_to_load) + { + if (name != part.name && !info.contains(part.info) && !info.isDisjoint(part.info)) + { + uncovered = false; + break; + } + } + unexpected_unloaded_data_parts.push_back({std::make_shared(info, name, disk), uncovered}); + } + + if (!unexpected_unloaded_data_parts.empty()) + { + LOG_DEBUG(log, "Found {} unexpected data parts. They will be loaded asynchronously", unexpected_data_parts.size()); + { + std::lock_guard lock(unexpected_data_parts_mutex); + unexpected_data_parts = std::move(unexpected_unloaded_data_parts); + unexpected_data_parts_loading_finished = false; + } + + unexpected_data_parts_loading_task = getContext()->getSchedulePool().createTask( + "MergeTreeData::loadUnexpectedDataParts", + [this] { loadUnexpectedDataParts(); }); + } + loading_tree.traverse(/*recursive=*/ true, [&](const auto & node) { if (!node->is_loaded) @@ -1858,6 +1925,59 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts"); + + for (auto & load_state : unexpected_data_parts) + { + std::lock_guard lock(unexpected_data_parts_mutex); + chassert(!load_state.part); + if (unexpected_data_parts_loading_canceled) + { + runner.waitForAllToFinishAndRethrowFirstError(); + return; + } + runner([&]() + { + loadUnexpectedDataPart(load_state); + + chassert(load_state.part); + if (load_state.is_broken) + { + load_state.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes + } + else + { + load_state.part->renameToDetached("ignore"); /// detached parts must not have '_' in prefixes + } + }, Priority{}); + } + runner.waitForAllToFinishAndRethrowFirstError(); + LOG_DEBUG(log, "Loaded {} unexpected data parts {}", num_loaded_parts); + + { + std::lock_guard lock(unexpected_data_parts_mutex); + unexpected_data_parts_loading_finished = true; + unexpected_data_parts_cv.notify_all(); + } +} + void MergeTreeData::loadOutdatedDataParts(bool is_async) try { @@ -1993,24 +2113,74 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ LOG_TRACE(log, "Finished waiting for outdated data parts to be loaded"); } -void MergeTreeData::startOutdatedDataPartsLoadingTask() +void MergeTreeData::waitForUnexpectedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ANALYSIS +{ + /// Background tasks are not run if storage is static. + if (isStaticStorage()) + return; + + /// If waiting is not required, do NOT log and do NOT enable/disable turbo mode to make `waitForUnexpectedPartsToBeLoaded` a lightweight check + { + std::unique_lock lock(unexpected_data_parts_mutex); + if (unexpected_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of unexpected data parts was already canceled"); + if (unexpected_data_parts_loading_finished) + return; + } + + /// We need to load parts as fast as possible + getUnexpectedPartsLoadingThreadPool().enableTurboMode(); + SCOPE_EXIT({ + /// Let's lower the number of threads e.g. for later ATTACH queries to behave as usual + getUnexpectedPartsLoadingThreadPool().disableTurboMode(); + }); + + LOG_TRACE(log, "Will wait for unexpected data parts to be loaded"); + + std::unique_lock lock(unexpected_data_parts_mutex); + + unexpected_data_parts_cv.wait(lock, [this]() TSA_NO_THREAD_SAFETY_ANALYSIS + { + return unexpected_data_parts_loading_finished || unexpected_data_parts_loading_canceled; + }); + + if (unexpected_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of unexpected data parts was canceled"); + + LOG_TRACE(log, "Finished waiting for unexpected data parts to be loaded"); +} + +void MergeTreeData::startOutdatedAndUnexpectedDataPartsLoadingTask() { if (outdated_data_parts_loading_task) outdated_data_parts_loading_task->activateAndSchedule(); + if (unexpected_data_parts_load_task) + unexpected_data_parts_load_task->activateAndSchedule(); } -void MergeTreeData::stopOutdatedDataPartsLoadingTask() +void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() { - if (!outdated_data_parts_loading_task) - return; - + if (outdated_data_parts_loading_task) { - std::lock_guard lock(outdated_data_parts_mutex); - outdated_data_parts_loading_canceled = true; + { + std::lock_guard lock(outdated_data_parts_mutex); + outdated_data_parts_loading_canceled = true; + } + + outdated_data_parts_loading_task->deactivate(); + outdated_data_parts_cv.notify_all(); } - outdated_data_parts_loading_task->deactivate(); - outdated_data_parts_cv.notify_all(); + if (unexpected_data_parts_load_task) + { + { + std::lock_guard lock(unexpected_data_parts_mutex); + unexpected_data_parts_loading_canceled = true; + } + + unexpected_data_parts_loading_task->deactivate(); + unexpected_data_parts_cv.notify_all(); + } } /// Is the part directory old. diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 1fff6ed1ebf..39fcbe61fd7 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -652,10 +652,9 @@ public: /// Renames the part to detached/_ and removes it from data_parts, //// so it will not be deleted in clearOldParts. - /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part. /// NOTE: This method is safe to use only for parts which nobody else holds (like on server start or for parts which was not committed). /// For active parts it's unsafe because this method modifies fields of part (rename) while some other thread can try to read it. - void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = "", bool restore_covered = false); + void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = ""); /// This method should not be here, but async loading of Outdated parts is implemented in MergeTreeData virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {} @@ -1072,6 +1071,7 @@ public: scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const; void waitForOutdatedPartsToBeLoaded() const; + void waitForUnexpectedPartsToBeLoaded() const; bool canUsePolymorphicParts() const; /// TODO: make enabled by default in the next release if no problems found. @@ -1519,7 +1519,7 @@ protected: using PartLoadingInfos = std::vector; /// Builds a tree from the list of part infos. - static PartLoadingTree build(PartLoadingInfos nodes, std::optional> expected_parts); + static PartLoadingTree build(PartLoadingInfos nodes); /// Traverses a tree and call @func on each node. /// If recursive is false traverses only the top level. @@ -1529,7 +1529,7 @@ protected: private: /// NOTE: Parts should be added in descending order of their levels /// because rearranging tree to the new root is not supported. - void add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk, std::optional> expected_parts); + void add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk); std::unordered_map root_by_partition; }; @@ -1549,13 +1549,33 @@ protected: PartLoadingTreeNodes outdated_unloaded_data_parts TSA_GUARDED_BY(outdated_data_parts_mutex); bool outdated_data_parts_loading_canceled TSA_GUARDED_BY(outdated_data_parts_mutex) = false; + mutable std::mutex unexpected_data_parts_mutex; + mutable std::condition_variable unexpected_data_parts_cv; + + struct UnexpectedPartLoadState + { + PartLoadingTree::NodePtr loading_info; + /// if it is covered by any unexpected part + bool uncovered = true; + bool is_broken = false; + bool is_empty = false; + MutableDataPartPtr part; + }; + + BackgroundSchedulePool::TaskHolder unexpected_data_parts_loading_task; + std::vector unexpected_data_parts TSA_GUARDED_BY(unexpected_data_parts_mutex); + bool unexpected_data_parts_loading_canceled TSA_GUARDED_BY(unexpected_data_parts_mutex) = false; + + void loadUnexpectedDataParts(); + void loadUnexpectedDataPart(UnexpectedPartLoadState & state); + /// This has to be "true" by default, because in case of empty table or absence of Outdated parts /// it is automatically finished. std::atomic_bool outdated_data_parts_loading_finished = true; void loadOutdatedDataParts(bool is_async); - void startOutdatedDataPartsLoadingTask(); - void stopOutdatedDataPartsLoadingTask(); + void startOutdatedAndUnexpectedDataPartsLoadingTask(); + void stopOutdatedAndUnexpectedDataPartsLoadingTask(); static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type); static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c425035dfba..0ef0f0ab878 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1576,7 +1576,7 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) * But actually we can't precisely determine that ALL missing parts * covered by this unexpected part. So missing parts will be downloaded. */ - DataParts unexpected_parts; + waitForUnexpectedPartsToBeLoaded(); /// Intersection of local parts and expected parts ActiveDataPartSet local_expected_parts_set(format_version); @@ -1584,10 +1584,7 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) /// Collect unexpected parts for (const auto & part : parts) { - if (expected_parts.contains(part->name)) - local_expected_parts_set.add(part->name); - else - unexpected_parts.insert(part); /// this parts we will place to detached with ignored_ prefix + local_expected_parts_set.add(part->name); } /// Which parts should be taken from other replicas. @@ -1600,16 +1597,11 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch); ActiveDataPartSet set_of_empty_unexpected_parts(format_version); - for (const auto & part : parts) + for (const auto & load_state : unexpected_data_parts) { - if (part->rows_count || part->getState() != MergeTreeDataPartState::Active || expected_parts.contains(part->name)) + if (load_state.is_broken || load_state.part->rows_count || !load_state.uncovered) continue; - if (incomplete_list_of_outdated_parts) - { - LOG_INFO(log, "Outdated parts are not loaded yet, but we may need them to handle dropped parts. Need retry."); - return false; - } set_of_empty_unexpected_parts.add(part->name); } if (auto empty_count = set_of_empty_unexpected_parts.size()) @@ -1629,9 +1621,9 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) std::unordered_set restorable_unexpected_parts; UInt64 uncovered_unexpected_parts_rows = 0; - for (const auto & part : unexpected_parts) + for (const auto & load_state : unexpected_data_parts) { - unexpected_parts_rows += part->rows_count; + unexpected_parts_rows += load_state.part->rows_count; /// This part may be covered by some expected part that is active and present locally /// Probably we just did not remove this part from disk before restart (but removed from ZooKeeper) @@ -1743,12 +1735,12 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch)); /// Remove extra local parts. - for (const DataPartPtr & part : unexpected_parts) - { - bool restore_covered = restorable_unexpected_parts.contains(part->name) || uncovered_unexpected_parts.contains(part->name); - LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : ""); - forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered); - } + /// for (const DataPartPtr & part : unexpected_parts) + /// { + /// bool restore_covered = restorable_unexpected_parts.contains(part->name) || uncovered_unexpected_parts.contains(part->name); + /// LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : ""); + /// forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered); + /// } return true; } @@ -5138,7 +5130,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( void StorageReplicatedMergeTree::startup() { LOG_TRACE(log, "Starting up table"); - startOutdatedDataPartsLoadingTask(); + startOutdatedAndUnexpectedDataPartsLoadingTask(); if (attach_thread) { attach_thread->start(); @@ -5341,7 +5333,7 @@ void StorageReplicatedMergeTree::shutdown(bool) } session_expired_callback_handler.reset(); - stopOutdatedDataPartsLoadingTask(); + stopOutdatedAndUnexpectedDataPartsLoadingTask(); partialShutdown(); From 116055b5329b6b3b3a08f85c92396863295f7a3c Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 8 May 2024 15:04:16 +0200 Subject: [PATCH 125/651] refine load part logic --- programs/local/LocalServer.cpp | 8 + programs/server/Server.cpp | 10 ++ src/Core/ServerSettings.h | 1 + src/IO/SharedThreadPools.cpp | 3 + src/IO/SharedThreadPools.h | 2 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 160 ++---------------- src/Storages/MergeTree/MergeTreeData.h | 4 +- src/Storages/StorageMergeTree.cpp | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 41 +++-- .../test.py | 2 +- 11 files changed, 62 insertions(+), 175 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 553b48584bc..6d1ebf8d30c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -160,6 +160,14 @@ void LocalServer::initialize(Poco::Util::Application & self) getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); + const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32); + getUnexpectedPartsLoadingThreadPool().initialize( + unexpected_parts_loading_threads, + 0, // We don't need any threads one all the parts will be loaded + unexpected_parts_loading_threads); + + getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); + const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128); getPartsCleaningThreadPool().initialize( cleanup_threads, diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index bcfe3799818..9c9476d1aa7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -885,6 +885,16 @@ try server_settings.max_active_parts_loading_thread_pool_size ); + getUnexpectedPartsLoadingThreadPool().initialize( + server_settings.max_unexpected_parts_loading_thread_pool_size, + 0, // We don't need any threads once all the parts will be loaded + server_settings.max_unexpected_parts_loading_thread_pool_size); + + /// It could grow if we need to synchronously wait until all the data parts will be loaded. + getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads( + server_settings.max_active_parts_loading_thread_pool_size + ); + getPartsCleaningThreadPool().initialize( server_settings.max_parts_cleaning_thread_pool_size, 0, // We don't need any threads one all the parts will be deleted diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index f41c596282f..04696a94fc2 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -25,6 +25,7 @@ namespace DB M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \ M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \ M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \ + M(UInt64, max_unexpected_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \ M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \ M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \ M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \ diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp index da5ed0cbe67..3606ddd984c 100644 --- a/src/IO/SharedThreadPools.cpp +++ b/src/IO/SharedThreadPools.cpp @@ -20,6 +20,9 @@ namespace CurrentMetrics extern const Metric MergeTreeOutdatedPartsLoaderThreads; extern const Metric MergeTreeOutdatedPartsLoaderThreadsActive; extern const Metric MergeTreeOutdatedPartsLoaderThreadsScheduled; + extern const Metric MergeTreeUnexpectedPartsLoaderThreads; + extern const Metric MergeTreeUnexpectedPartsLoaderThreadsActive; + extern const Metric MergeTreeUnexpectedPartsLoaderThreadsScheduled; extern const Metric DatabaseReplicatedCreateTablesThreads; extern const Metric DatabaseReplicatedCreateTablesThreadsActive; extern const Metric DatabaseReplicatedCreateTablesThreadsScheduled; diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h index acc5368f8ac..50adc70c9a0 100644 --- a/src/IO/SharedThreadPools.h +++ b/src/IO/SharedThreadPools.h @@ -64,6 +64,8 @@ StaticThreadPool & getPartsCleaningThreadPool(); /// the number of threads by calling enableTurboMode() :-) StaticThreadPool & getOutdatedPartsLoadingThreadPool(); +StaticThreadPool & getUnexpectedPartsLoadingThreadPool(); + /// ThreadPool used for creating tables in DatabaseReplicated. StaticThreadPool & getDatabaseReplicatedCreateTablesThreadPool(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e1810554cce..a65c8a61da1 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1270,7 +1270,7 @@ void IMergeTreeDataPart::loadRowsCountFileForUnexpectedPart() }; if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part) { - if(metadata_manager->exists("count.txt")) + if (metadata_manager->exists("count.txt")) { read_rows_count(); return; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6ac7ed51580..b5e5a1b93fc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1317,7 +1317,7 @@ void MergeTreeData::loadUnexpectedDataPart(UnexpectedPartLoadState & state) const MergeTreePartInfo & part_info = state.loading_info->info; const String & part_name = state.loading_info->name; const DiskPtr & part_disk_ptr = state.loading_info->disk; - LOG_TRACE(log, "Loading {} unexpected part {} from disk {}", magic_enum::enum_name(to_state), part_name, part_disk_ptr->getName()); + LOG_TRACE(log, "Loading unexpected part {} from disk {}", part_name, part_disk_ptr->getName()); LoadPartResult res; auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); @@ -1332,12 +1332,10 @@ void MergeTreeData::loadUnexpectedDataPart(UnexpectedPartLoadState & state) .build(); state.part->loadRowsCountFileForUnexpectedPart(); - - state.is_empty = part->isEmpty(); } catch (...) { - LOG_DEBUG(log, "Failed to load data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false)); + LOG_DEBUG(log, "Failed to load unexcepted data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false)); if (!state.part) { /// Build a fake part and mark it as broken in case of filesystem error. @@ -1350,9 +1348,7 @@ void MergeTreeData::loadUnexpectedDataPart(UnexpectedPartLoadState & state) } state.is_broken = true; - tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part_name, part_path)); - - state.is_empty = calculatePartSizeSafe(res.part, log.load()) == 0; + tryLogCurrentException(log, fmt::format("while loading unexcepted part {} on path {}", part_name, part_path)); } } @@ -1931,12 +1927,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional(info, name, disk), uncovered}); + unexpected_unloaded_data_parts.push_back({std::make_shared(info, name, disk), uncovered, false, nullptr}); } if (!unexpected_unloaded_data_parts.empty()) { - LOG_DEBUG(log, "Found {} unexpected data parts. They will be loaded asynchronously", unexpected_data_parts.size()); + LOG_DEBUG(log, "Found {} unexpected data parts. They will be loaded asynchronously", unexpected_unloaded_data_parts.size()); { std::lock_guard lock(unexpected_data_parts_mutex); unexpected_data_parts = std::move(unexpected_unloaded_data_parts); @@ -1977,16 +1973,15 @@ void MergeTreeData::loadUnexpectedDataParts() { { std::lock_guard lock(unexpected_data_parts_mutex); - if (unexpected_unloaded_data_parts.empty()) + if (unexpected_data_parts.empty()) { unexpected_data_parts_loading_finished = true; unexpected_data_parts_cv.notify_all(); return; } - LOG_DEBUG(log, "Loading {} unexpected data parts {}", - unexpected_unloaded_data_parts.size(), - is_async ? "asynchronously" : "synchronously"); + LOG_DEBUG(log, "Loading {} unexpected data parts", + unexpected_data_parts.size()); } ThreadFuzzer::maybeInjectSleep(); @@ -2017,7 +2012,7 @@ void MergeTreeData::loadUnexpectedDataParts() }, Priority{}); } runner.waitForAllToFinishAndRethrowFirstError(); - LOG_DEBUG(log, "Loaded {} unexpected data parts {}", num_loaded_parts); + LOG_DEBUG(log, "Loaded {} unexpected data parts", unexpected_data_parts.size()); { std::lock_guard lock(unexpected_data_parts_mutex); @@ -2202,8 +2197,8 @@ void MergeTreeData::startOutdatedAndUnexpectedDataPartsLoadingTask() { if (outdated_data_parts_loading_task) outdated_data_parts_loading_task->activateAndSchedule(); - if (unexpected_data_parts_load_task) - unexpected_data_parts_load_task->activateAndSchedule(); + if (unexpected_data_parts_loading_task) + unexpected_data_parts_loading_task->activateAndSchedule(); } void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() @@ -2219,7 +2214,7 @@ void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() outdated_data_parts_cv.notify_all(); } - if (unexpected_data_parts_load_task) + if (unexpected_data_parts_loading_task) { { std::lock_guard lock(unexpected_data_parts_mutex); @@ -4288,16 +4283,13 @@ void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); } -void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) +void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix) { if (prefix.empty()) LOG_INFO(log, "Renaming {} to {} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); else LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); - if (restore_covered) - waitForOutdatedPartsToBeLoaded(); - auto lock = lockParts(); bool removed_active_part = false; bool restored_active_part = false; @@ -4323,132 +4315,6 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT LOG_TEST(log, "forcefullyMovePartToDetachedAndRemoveFromMemory: removing {} from data_parts_indexes", part->getNameWithState()); data_parts_indexes.erase(it_part); - if (restore_covered && part->info.level == 0 && part->info.mutation == 0) - { - LOG_WARNING(log, "Will not recover parts covered by zero-level part {}", part->name); - return; - } - - /// Let's restore some parts covered by unexpected to avoid partial data - if (restore_covered) - { - Strings restored; - Strings error_parts; - - auto is_appropriate_state = [] (const DataPartPtr & part_) - { - /// In rare cases, we may have a chain of unexpected parts that cover common source parts, e.g. all_1_2_3, all_1_3_4 - /// It may happen as a result of interrupted cloneReplica - bool already_active = part_->getState() == DataPartState::Active; - if (!already_active && part_->getState() != DataPartState::Outdated) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to restore a part {} from unexpected state: {}", part_->name, part_->getState()); - return !already_active; - }; - - auto activate_part = [this, &restored_active_part](auto it) - { - /// It's not clear what to do if we try to activate part that was removed in transaction. - /// It may happen only in ReplicatedMergeTree, so let's simply throw LOGICAL_ERROR for now. - chassert((*it)->version.isRemovalTIDLocked()); - if ((*it)->version.removal_tid_lock == Tx::PrehistoricTID.getHash()) - (*it)->version.unlockRemovalTID(Tx::PrehistoricTID, TransactionInfoContext{getStorageID(), (*it)->name}); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot activate part {} that was removed by transaction ({})", - (*it)->name, (*it)->version.removal_tid_lock); - - addPartContributionToColumnAndSecondaryIndexSizes(*it); - addPartContributionToDataVolume(*it); - modifyPartState(it, DataPartState::Active); /// iterator is not invalidated here - restored_active_part = true; - }; - - /// ActiveDataPartSet allows to restore most top-level parts instead of unexpected. - /// It can be important in case of assigned merges. If unexpected part is result of some - /// finished, but not committed merge then we should restore (at least try to restore) - /// closest ancestors for the unexpected part to be able to execute it. - /// However it's not guaranteed because outdated parts can intersect - ActiveDataPartSet parts_for_replacement(format_version); - auto range = getDataPartsPartitionRange(part->info.partition_id); - DataPartsVector parts_candidates(range.begin(), range.end()); - - /// In case of intersecting outdated parts we want to add bigger parts (with higher level) first - auto comparator = [] (const DataPartPtr left, const DataPartPtr right) -> bool - { - if (left->info.level < right->info.level) - return true; - else if (left->info.level > right->info.level) - return false; - else - return left->info.mutation < right->info.mutation; - }; - std::sort(parts_candidates.begin(), parts_candidates.end(), comparator); - /// From larger to smaller parts - for (const auto & part_candidate_in_partition : parts_candidates | std::views::reverse) - { - if (part->info.contains(part_candidate_in_partition->info) - && is_appropriate_state(part_candidate_in_partition)) - { - String out_reason; - /// Outdated parts can itersect legally (because of DROP_PART) here it's okay, we - /// are trying to do out best to restore covered parts. - auto outcome = parts_for_replacement.tryAddPart(part_candidate_in_partition->info, &out_reason); - if (outcome == ActiveDataPartSet::AddPartOutcome::HasIntersectingPart) - { - error_parts.push_back(part->name); - LOG_ERROR(log, "Failed to restore part {}, because of intersection reason '{}'", part->name, out_reason); - } - } - } - - if (parts_for_replacement.size() > 0) - { - std::vector> holes_list; - /// Most part of the code below is just to write pretty message - auto part_infos = parts_for_replacement.getPartInfos(); - int64_t current_right_block = part_infos[0].min_block; - for (const auto & top_level_part_to_replace : part_infos) - { - auto data_part_it = data_parts_by_info.find(top_level_part_to_replace); - if (data_part_it == data_parts_by_info.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find part {} in own set", top_level_part_to_replace.getPartNameForLogs()); - activate_part(data_part_it); - restored.push_back((*data_part_it)->name); - if (top_level_part_to_replace.min_block - current_right_block > 1) - holes_list.emplace_back(current_right_block, top_level_part_to_replace.min_block); - current_right_block = top_level_part_to_replace.max_block; - } - if (part->info.max_block != current_right_block) - holes_list.emplace_back(current_right_block, part->info.max_block); - - for (const String & name : restored) - LOG_INFO(log, "Activated part {} in place of unexpected {}", name, part->name); - - if (!error_parts.empty() || !holes_list.empty()) - { - std::string error_parts_message, holes_list_message; - if (!error_parts.empty()) - error_parts_message = fmt::format(" Parts failed to restore because of intersection: [{}]", fmt::join(error_parts, ", ")); - if (!holes_list.empty()) - { - if (!error_parts.empty()) - holes_list_message = "."; - - Strings holes_list_pairs; - for (const auto & [left_side, right_side] : holes_list) - holes_list_pairs.push_back(fmt::format("({}, {})", left_side + 1, right_side - 1)); - holes_list_message += fmt::format(" Block ranges failed to restore: [{}]", fmt::join(holes_list_pairs, ", ")); - } - LOG_WARNING(log, "The set of parts restored in place of {} looks incomplete. " - "SELECT queries may observe gaps in data until this replica is synchronized with other replicas.{}{}", - part->name, error_parts_message, holes_list_message); - } - } - else - { - LOG_INFO(log, "Don't find any parts for replacement instead of unexpected {}", part->name); - } - } - if (removed_active_part || restored_active_part) resetObjectColumnsFromActiveParts(lock); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index f7e84c5086f..2f9283659e3 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1560,12 +1560,11 @@ protected: /// if it is covered by any unexpected part bool uncovered = true; bool is_broken = false; - bool is_empty = false; MutableDataPartPtr part; }; BackgroundSchedulePool::TaskHolder unexpected_data_parts_loading_task; - std::vector unexpected_data_parts TSA_GUARDED_BY(unexpected_data_parts_mutex); + std::vector unexpected_data_parts; bool unexpected_data_parts_loading_canceled TSA_GUARDED_BY(unexpected_data_parts_mutex) = false; void loadUnexpectedDataParts(); @@ -1574,6 +1573,7 @@ protected: /// This has to be "true" by default, because in case of empty table or absence of Outdated parts /// it is automatically finished. std::atomic_bool outdated_data_parts_loading_finished = true; + std::atomic_bool unexpected_data_parts_loading_finished = true; void loadOutdatedDataParts(bool is_async); void startOutdatedAndUnexpectedDataPartsLoadingTask(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b24549df757..9144ef7c0f7 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -153,7 +153,7 @@ void StorageMergeTree::startup() { background_operations_assignee.start(); startBackgroundMovesIfNeeded(); - startOutdatedDataPartsLoadingTask(); + startOutdatedAndUnexpectedDataPartsLoadingTask(); } catch (...) { @@ -179,7 +179,7 @@ void StorageMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; - stopOutdatedDataPartsLoadingTask(); + stopOutdatedAndUnexpectedDataPartsLoadingTask(); /// Unlock all waiting mutations { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d4074ecb3f3..2ad2cad0bd3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1576,8 +1576,6 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) * But actually we can't precisely determine that ALL missing parts * covered by this unexpected part. So missing parts will be downloaded. */ - waitForUnexpectedPartsToBeLoaded(); - /// Intersection of local parts and expected parts ActiveDataPartSet local_expected_parts_set(format_version); @@ -1596,13 +1594,15 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch); + waitForUnexpectedPartsToBeLoaded(); + ActiveDataPartSet set_of_empty_unexpected_parts(format_version); for (const auto & load_state : unexpected_data_parts) { if (load_state.is_broken || load_state.part->rows_count || !load_state.uncovered) continue; - set_of_empty_unexpected_parts.add(part->name); + set_of_empty_unexpected_parts.add(load_state.part->name); } if (auto empty_count = set_of_empty_unexpected_parts.size()) LOG_WARNING(log, "Found {} empty unexpected parts (probably some dropped parts were not cleaned up before restart): [{}]", @@ -1623,31 +1623,33 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) for (const auto & load_state : unexpected_data_parts) { + if (load_state.is_broken) + continue; unexpected_parts_rows += load_state.part->rows_count; /// This part may be covered by some expected part that is active and present locally /// Probably we just did not remove this part from disk before restart (but removed from ZooKeeper) - String covering_local_part = local_expected_parts_set.getContainingPart(part->name); + String covering_local_part = local_expected_parts_set.getContainingPart(load_state.part->name); if (!covering_local_part.empty()) { - covered_unexpected_parts.push_back(part->name); + covered_unexpected_parts.push_back(load_state.part->name); continue; } - String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(part->name); + String covering_empty_part = set_of_empty_unexpected_parts.getContainingPart(load_state.part->name); if (!covering_empty_part.empty()) { LOG_INFO(log, "Unexpected part {} is covered by empty part {}, assuming it has been dropped just before restart", - part->name, covering_empty_part); - covered_unexpected_parts.push_back(part->name); + load_state.part->name, covering_empty_part); + covered_unexpected_parts.push_back(load_state.part->name); continue; } - auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(part->info); + auto covered_parts = local_expected_parts_set.getPartInfosCoveredBy(load_state.part->info); - if (MergeTreePartInfo::areAllBlockNumbersCovered(part->info, covered_parts)) + if (MergeTreePartInfo::areAllBlockNumbersCovered(load_state.part->info, covered_parts)) { - restorable_unexpected_parts.insert(part->name); + restorable_unexpected_parts.insert(load_state.part->name); continue; } @@ -1661,16 +1663,18 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) } /// Part is unexpected and we don't have covering part: it's suspicious - uncovered_unexpected_parts.insert(part->name); - uncovered_unexpected_parts_rows += part->rows_count; + uncovered_unexpected_parts.insert(load_state.part->name); + uncovered_unexpected_parts_rows += load_state.part->rows_count; - if (part->info.level > 0) + if (load_state.part->info.level > 0) { ++unexpected_parts_nonnew; - unexpected_parts_nonnew_rows += part->rows_count; + unexpected_parts_nonnew_rows += load_state.part->rows_count; } } + unexpected_data_parts.clear(); + const UInt64 parts_to_fetch_blocks = std::accumulate(parts_to_fetch.cbegin(), parts_to_fetch.cend(), 0, [&](UInt64 acc, const String & part_name) { @@ -1734,13 +1738,6 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) /// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them. queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch)); - /// Remove extra local parts. - /// for (const DataPartPtr & part : unexpected_parts) - /// { - /// bool restore_covered = restorable_unexpected_parts.contains(part->name) || uncovered_unexpected_parts.contains(part->name); - /// LOG_ERROR(log, "Renaming unexpected part {} to ignored_{}{}", part->name, part->name, restore_covered ? ", restoring covered parts" : ""); - /// forcefullyMovePartToDetachedAndRemoveFromMemory(part, "ignored", restore_covered); - /// } return true; } diff --git a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py index 0d009e6b132..683715da404 100644 --- a/tests/integration/test_max_suspicious_broken_parts_replicated/test.py +++ b/tests/integration/test_max_suspicious_broken_parts_replicated/test.py @@ -223,4 +223,4 @@ def test_corrupted_unexpected_part_ultimate(): == "1\n" ) - assert node.query("SELECT sum(key) FROM broken_table_3") == "190\n" + assert node.query("SELECT sum(key) FROM broken_table_3") == "145\n" From 56d1dc05105ab0ce777f048695e67e4534d06f81 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 8 May 2024 18:40:02 +0200 Subject: [PATCH 126/651] fix tests --- .../system-tables/server_settings.md | 29 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.cpp | 4 +-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md index 417c3460a53..01ffb76e3f2 100644 --- a/docs/en/operations/system-tables/server_settings.md +++ b/docs/en/operations/system-tables/server_settings.md @@ -32,20 +32,21 @@ WHERE name LIKE '%thread_pool%' ``` ``` text -┌─name────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐ -│ max_thread_pool_size │ 10000 │ 10000 │ 0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ No │ 0 │ -│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ No │ 0 │ -│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ No │ 0 │ -│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ No │ 0 │ -│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ No │ 0 │ -│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ No │ 0 │ -│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ No │ 0 │ -│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ No │ 0 │ -│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ No │ 0 │ -│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ No │ 0 │ -│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ No │ 0 │ -│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ No │ 0 │ -└─────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘ +┌─name──────────────────────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┬─changeable_without_restart─┬─is_obsolete─┐ +│ max_thread_pool_size │ 10000 │ 10000 │ 0 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ No │ 0 │ +│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ No │ 0 │ +│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ No │ 0 │ +│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ No │ 0 │ +│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ No │ 0 │ +│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ No │ 0 │ +│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ No │ 0 │ +│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ No │ 0 │ +│ max_unexpected_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Unexpected ones) at startup. │ UInt64 │ No │ 0 │ +│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ No │ 0 │ +│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ No │ 0 │ +│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ No │ 0 │ +│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ No │ 0 │ +└───────────────────────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┴────────────────────────────┴─────────────┘ ``` diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b5e5a1b93fc..1a9352ccc4c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1861,7 +1861,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalrenameToDetached("ignore"); /// detached parts must not have '_' in prefixes + load_state.part->renameToDetached("ignored"); /// detached parts must not have '_' in prefixes } }, Priority{}); } From eedef6d826412b6b9f621527a0c4367142b03632 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Tue, 7 May 2024 11:10:27 +0300 Subject: [PATCH 127/651] fix empty used_dictionaries in query_log --- .../ExternalDictionariesLoader.cpp | 4 +- ...3148_query_log_used_dictionaries.reference | 4 + .../03148_query_log_used_dictionaries.sql | 84 +++++++++++++++++++ 3 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03148_query_log_used_dictionaries.reference create mode 100644 tests/queries/0_stateless/03148_query_log_used_dictionaries.sql diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index f48ee61dab8..49891e6cd60 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -79,7 +79,7 @@ ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(co std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); return std::static_pointer_cast(load(resolved_dictionary_name)); } @@ -89,7 +89,7 @@ ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); return std::static_pointer_cast(tryLoad(resolved_dictionary_name)); } diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference new file mode 100644 index 00000000000..1f54474efa3 --- /dev/null +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference @@ -0,0 +1,4 @@ +simple_with_analyzer 1 +nested_with_analyzer 1 +simple_without_analyzer 1 +nested_without_analyzer 1 diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql new file mode 100644 index 00000000000..6f10b118c92 --- /dev/null +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql @@ -0,0 +1,84 @@ +DROP DICTIONARY IF EXISTS 03148_dictionary; + +CREATE DICTIONARY 03148_dictionary ( + id UInt64, + name String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE( + QUERY 'select 0 as id, ''name0'' as name' +)) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED); + +SELECT + dictGet('03148_dictionary', 'name', number) as dict_value +FROM numbers(1) +SETTINGS + allow_experimental_analyzer = 1, + log_comment = 'simple_with_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'simple_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'simple_with_analyzer'; + +SELECT * +FROM ( + SELECT + dictGet('03148_dictionary', 'name', number) as dict_value + FROM numbers(1) +) t +SETTINGS + allow_experimental_analyzer = 1, + log_comment = 'nested_with_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'nested_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'nested_with_analyzer'; + +SELECT + dictGet('03148_dictionary', 'name', number) as dict_value +FROM numbers(1) +SETTINGS + allow_experimental_analyzer = 0, + log_comment = 'simple_without_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'simple_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'simple_without_analyzer'; + +SELECT * +FROM ( + SELECT + dictGet('03148_dictionary', 'name', number) as dict_value + FROM numbers(1) +) t +SETTINGS + allow_experimental_analyzer = 0, + log_comment = 'nested_without_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'nested_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'nested_without_analyzer'; + +DROP DICTIONARY IF EXISTS 03148_dictionary; From db8079768a95d8c7da1fc4715c8cee4c9fd6d128 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 9 May 2024 00:00:51 +0200 Subject: [PATCH 128/651] automatically recover Replicated database if the queue got stuck --- src/Databases/DatabaseReplicatedSettings.h | 1 + src/Databases/DatabaseReplicatedWorker.cpp | 22 ++++++++ src/Interpreters/DDLWorker.cpp | 27 ++++++++- src/Interpreters/DDLWorker.h | 3 + .../configs/config.xml | 1 + .../configs/inconsistent_settings.xml | 18 ++++++ .../configs/settings2.xml | 17 ++++++ .../test_replicated_database/test.py | 55 ++++++++++++++++++- 8 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 tests/integration/test_replicated_database/configs/inconsistent_settings.xml create mode 100644 tests/integration/test_replicated_database/configs/settings2.xml diff --git a/src/Databases/DatabaseReplicatedSettings.h b/src/Databases/DatabaseReplicatedSettings.h index 66c1a860b73..eb6dca7d5c4 100644 --- a/src/Databases/DatabaseReplicatedSettings.h +++ b/src/Databases/DatabaseReplicatedSettings.h @@ -13,6 +13,7 @@ class ASTStorage; M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \ M(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \ + M(UInt64, max_retries_before_automatic_recovery, 100, "Max number of attempts to execute a queue entry before marking replica as lost recovering it from snapshot (0 means infinite)", 0) \ DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 51065062995..6e19a77c501 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -18,6 +18,8 @@ namespace ErrorCodes extern const int UNFINISHED; } +static constexpr const char * FORCE_AUTO_RECOVERY_DIGEST = "42"; + DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, ContextPtr context_) : DDLWorker(/* pool_size */ 1, db->zookeeper_path + "/log", context_, nullptr, {}, fmt::format("DDLWorker({})", db->getDatabaseName())) , database(db) @@ -44,6 +46,26 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread() /// NOTE It will not stop cleanup thread until DDLWorker::shutdown() call (cleanup thread will just do nothing) break; } + + if (database->db_settings.max_retries_before_automatic_recovery && + database->db_settings.max_retries_before_automatic_recovery <= subsequent_errors_count) + { + String current_task_name; + { + std::unique_lock lock{mutex}; + current_task_name = current_task; + } + LOG_WARNING(log, "Database got stuck at processing task {}: it failed {} times in a row with the same error. " + "Will reset digest to mark our replica as lost, and trigger recovery from the most up-to-date metadata " + "from ZooKeeper. See max_retries_before_automatic_recovery setting. The error: {}", + current_task, subsequent_errors_count, last_unexpected_error); + + String digest_str; + zookeeper->tryGet(database->replica_path + "/digest", digest_str); + LOG_WARNING(log, "Resetting digest from {} to {}", digest_str, FORCE_AUTO_RECOVERY_DIGEST); + zookeeper->trySet(database->replica_path + "/digest", FORCE_AUTO_RECOVERY_DIGEST); + } + initializeReplication(); initialized = true; return true; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index fd807d54eff..d7434524b13 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -676,7 +676,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) if (task.execution_status.code != 0) { bool status_written_by_table_or_db = task.ops.empty(); - if (status_written_by_table_or_db) + bool is_replicated_database_task = dynamic_cast(&task); + if (status_written_by_table_or_db || is_replicated_database_task) { throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message); } @@ -710,6 +711,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) task.createSyncedNodeIfNeed(zookeeper); updateMaxDDLEntryID(task.entry_name); task.completely_processed = true; + subsequent_errors_count = 0; } @@ -1145,12 +1147,14 @@ void DDLWorker::runMainThread() cleanup_event->set(); scheduleTasks(reinitialized); + subsequent_errors_count = 0; LOG_DEBUG(log, "Waiting for queue updates"); queue_updated_event->wait(); } catch (const Coordination::Exception & e) { + subsequent_errors_count = 0; if (Coordination::isHardwareError(e.code)) { initialized = false; @@ -1168,7 +1172,26 @@ void DDLWorker::runMainThread() } catch (...) { - tryLogCurrentException(log, "Unexpected error, will try to restart main thread"); + String message = getCurrentExceptionMessage(/*with_stacktrace*/ true); + if (subsequent_errors_count) + { + if (last_unexpected_error == message) + { + ++subsequent_errors_count; + } + else + { + subsequent_errors_count = 1; + last_unexpected_error = message; + } + } + else + { + subsequent_errors_count = 1; + last_unexpected_error = message; + } + + LOG_ERROR(log, "Unexpected error ({} times in a row), will try to restart main thread: {}", subsequent_errors_count, message); reset_state(); sleepForSeconds(5); } diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index 9eb6606e73c..6d1dabda54f 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -194,6 +194,9 @@ protected: ConcurrentSet entries_to_skip; + std::atomic_uint64_t subsequent_errors_count = 0; + String last_unexpected_error; + const CurrentMetrics::Metric * max_entry_metric; const CurrentMetrics::Metric * max_pushed_entry_metric; }; diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml index 7d779cb0d2e..5150e9096de 100644 --- a/tests/integration/test_replicated_database/configs/config.xml +++ b/tests/integration/test_replicated_database/configs/config.xml @@ -5,4 +5,5 @@ 10 50 + 42 diff --git a/tests/integration/test_replicated_database/configs/inconsistent_settings.xml b/tests/integration/test_replicated_database/configs/inconsistent_settings.xml new file mode 100644 index 00000000000..13dc1eae976 --- /dev/null +++ b/tests/integration/test_replicated_database/configs/inconsistent_settings.xml @@ -0,0 +1,18 @@ + + + + 1 + 1 + 0 + 0 + + 0 + 1 + + + + + default + + + diff --git a/tests/integration/test_replicated_database/configs/settings2.xml b/tests/integration/test_replicated_database/configs/settings2.xml new file mode 100644 index 00000000000..dad5740a8ae --- /dev/null +++ b/tests/integration/test_replicated_database/configs/settings2.xml @@ -0,0 +1,17 @@ + + + + 1 + 1 + 0 + 0 + + 0 + + + + + default + + + diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 706620cfaef..801aac90292 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -24,7 +24,7 @@ main_node = cluster.add_instance( dummy_node = cluster.add_instance( "dummy_node", main_configs=["configs/config.xml"], - user_configs=["configs/settings.xml"], + user_configs=["configs/settings2.xml"], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2}, @@ -47,8 +47,9 @@ snapshotting_node = cluster.add_instance( snapshot_recovering_node = cluster.add_instance( "snapshot_recovering_node", main_configs=["configs/config.xml"], - user_configs=["configs/settings.xml"], + user_configs=["configs/inconsistent_settings.xml"], with_zookeeper=True, + macros={"shard": 1, "replica": 4}, ) all_nodes = [ @@ -1443,3 +1444,53 @@ def test_table_metadata_corruption(started_cluster): main_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") dummy_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + + +def test_auto_recovery(started_cluster): + dummy_node.query("DROP DATABASE IF EXISTS auto_recovery") + snapshot_recovering_node.query("DROP DATABASE IF EXISTS auto_recovery") + + dummy_node.query( + "CREATE DATABASE auto_recovery ENGINE = Replicated('/clickhouse/databases/auto_recovery', 'shard1', 'replica1');" + ) + snapshot_recovering_node.query( + "CREATE DATABASE auto_recovery ENGINE = Replicated('/clickhouse/databases/auto_recovery', 'shard1', 'replica2') SETTINGS max_retries_before_automatic_recovery=3;" + ) + + dummy_node.query( + "CREATE TABLE auto_recovery.t1 (n int) ENGINE=ReplicatedMergeTree ORDER BY n" + ) + dummy_node.query("INSERT INTO auto_recovery.t1 SELECT 42") + # dummy_node has 0 (default is 1), + # so it will consider that the setting is changed, and will write it to the DDL entry + # snapshot_recovering_node has implicit_transaction=1, so it will fail and recover from snapshot + dummy_node.query( + "CREATE TABLE auto_recovery.t2 (n int) ENGINE=ReplicatedMergeTree ORDER BY tuple()", + settings={ + "throw_on_unsupported_query_inside_transaction": 1, + "distributed_ddl_task_timeout": 0, + }, + ) + dummy_node.query("INSERT INTO auto_recovery.t2 SELECT 137") + dummy_node.query( + "EXCHANGE TABLES auto_recovery.t1 AND auto_recovery.t2", + settings={"distributed_ddl_task_timeout": 0}, + ) + + snapshot_recovering_node.query( + "SYSTEM SYNC DATABASE REPLICA auto_recovery", settings={"receive_timeout": 60} + ) + assert snapshot_recovering_node.contains_in_log( + "Unexpected error (3 times in a row), will try to restart main thread" + ) + assert snapshot_recovering_node.contains_in_log( + "Cannot begin an implicit transaction" + ) + snapshot_recovering_node.query("SYSTEM SYNC REPLICA auto_recovery.t1") + snapshot_recovering_node.query("SYSTEM SYNC REPLICA auto_recovery.t2") + + assert "42\n" == dummy_node.query("SELECT * FROM auto_recovery.t2") + assert "137\n" == dummy_node.query("SELECT * FROM auto_recovery.t1") + + assert "42\n" == snapshot_recovering_node.query("SELECT * FROM auto_recovery.t2") + assert "137\n" == snapshot_recovering_node.query("SELECT * FROM auto_recovery.t1") From a365c36e9d4202d1f1a7802268b27ac5b35673b9 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Thu, 9 May 2024 11:06:31 +0300 Subject: [PATCH 129/651] use qualified dictionary name in query log --- docs/en/operations/system-tables/query_log.md | 2 +- src/Dictionaries/IDictionary.h | 9 +++++++++ src/Interpreters/ExternalDictionariesLoader.cpp | 12 +++++++----- .../03148_query_log_used_dictionaries.reference | 8 ++++---- .../03148_query_log_used_dictionaries.sql | 8 ++++---- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index d48eb31df00..75b855966a3 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -108,7 +108,7 @@ Columns: - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. - `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. -- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. +- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. For dictionaries configured using an XML file this is the name of the dictionary, and for dictionaries created by an SQL statement, the canonical name is the fully qualified object name. - `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index bab80d3cd57..944e00f14c9 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -69,6 +69,15 @@ public: return dictionary_id.getNameForLogs(); } + /// Returns fully qualified unquoted dictionary name + std::string getQualifiedName() const + { + std::lock_guard lock{mutex}; + if (dictionary_id.database_name.empty()) + return dictionary_id.table_name; + return dictionary_id.database_name + "." + dictionary_id.table_name; + } + StorageID getDictionaryID() const { std::lock_guard lock{mutex}; diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 49891e6cd60..1685c06d387 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -77,21 +77,23 @@ void ExternalDictionariesLoader::updateObjectFromConfigWithoutReloading(IExterna ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(const std::string & dictionary_name, ContextPtr local_context) const { std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); + auto dictionary = std::static_pointer_cast(load(resolved_dictionary_name)); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, dictionary->getQualifiedName()); - return std::static_pointer_cast(load(resolved_dictionary_name)); + return dictionary; } ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary(const std::string & dictionary_name, ContextPtr local_context) const { std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); + auto dictionary = std::static_pointer_cast(tryLoad(resolved_dictionary_name)); - if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries && dictionary) + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, dictionary->getQualifiedName()); - return std::static_pointer_cast(tryLoad(resolved_dictionary_name)); + return dictionary; } diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference index 1f54474efa3..4fa3a14e63f 100644 --- a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference @@ -1,4 +1,4 @@ -simple_with_analyzer 1 -nested_with_analyzer 1 -simple_without_analyzer 1 -nested_without_analyzer 1 +simple_with_analyzer ['default.03148_dictionary'] +nested_with_analyzer ['default.03148_dictionary'] +simple_without_analyzer ['default.03148_dictionary'] +nested_without_analyzer ['default.03148_dictionary'] diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql index 6f10b118c92..1b647a7ee62 100644 --- a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql @@ -21,7 +21,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'simple_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' @@ -40,7 +40,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'nested_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' @@ -56,7 +56,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'simple_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' @@ -75,7 +75,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'nested_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' From 412805c99e0e789d7bc13dcb73fdf8199758ad2a Mon Sep 17 00:00:00 2001 From: Danila Puzov Date: Thu, 9 May 2024 19:38:19 +0300 Subject: [PATCH 130/651] Add serial, generateSnowflakeID, generateUUIDv7 functions --- src/Functions/generateSnowflakeID.cpp | 92 ++++++++++++++ src/Functions/generateUUIDv7.cpp | 113 +++++++++++++++++ src/Functions/serial.cpp | 171 ++++++++++++++++++++++++++ 3 files changed, 376 insertions(+) create mode 100644 src/Functions/generateSnowflakeID.cpp create mode 100644 src/Functions/generateUUIDv7.cpp create mode 100644 src/Functions/serial.cpp diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp new file mode 100644 index 00000000000..e54b720ec98 --- /dev/null +++ b/src/Functions/generateSnowflakeID.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +class FunctionSnowflakeID : public IFunction +{ +private: + mutable std::atomic machine_sequence_number{0}; + mutable std::atomic last_timestamp{0}; + +public: + static constexpr auto name = "generateSnowflakeID"; + + static FunctionPtr create(ContextPtr /*context*/) + { + return std::make_shared(); + } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } + + bool isStateful() const override { return true; } + bool isDeterministic() const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() > 1) { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", + getName(), arguments.size()); + } + + return std::make_shared(); + } + + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_res->getData(); + size_t size = input_rows_count; + vec_to.resize(size); + + auto serverUUID = ServerUUID::get(); + + // hash serverUUID into 32 bytes + Int64 h = UUIDHelpers::getHighBytes(serverUUID); + Int64 l = UUIDHelpers::getLowBytes(serverUUID); + Int64 machine_id = (h * 11) ^ (l * 17); + + for (Int64 & x : vec_to) { + const auto tm_point = std::chrono::system_clock::now(); + Int64 current_timestamp = std::chrono::duration_cast( + tm_point.time_since_epoch()).count(); + + Int64 local_machine_sequence_number = 0; + + if (current_timestamp != last_timestamp.load()) { + machine_sequence_number.store(0); + last_timestamp.store(current_timestamp); + } else { + local_machine_sequence_number = machine_sequence_number.fetch_add(1) + 1; + } + + x = (current_timestamp << 22) | (machine_id & 0x3ff000ull) | (local_machine_sequence_number & 0xfffull); + } + + return col_res; + } + +}; + +REGISTER_FUNCTION(GenerateSnowflakeID) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp new file mode 100644 index 00000000000..61d742d2fda --- /dev/null +++ b/src/Functions/generateUUIDv7.cpp @@ -0,0 +1,113 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +#define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \ +DECLARE_DEFAULT_CODE (__VA_ARGS__) \ +DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__) + +DECLARE_SEVERAL_IMPLEMENTATIONS( + +class FunctionGenerateUUIDv7 : public IFunction +{ +public: + static constexpr auto name = "generateUUIDv7"; + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 0; } + + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() > 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", + getName(), arguments.size()); + + return std::make_shared(); + } + + bool isDeterministic() const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_res->getData(); + + size_t size = input_rows_count; + vec_to.resize(size); + + /// RandImpl is target-dependent and is not the same in different TargetSpecific namespaces. + RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UUID)); + + for (UUID & uuid : vec_to) + { + /// https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#section-5.2 + + const auto tm_point = std::chrono::system_clock::now(); + UInt64 unix_ts_ms = std::chrono::duration_cast( + tm_point.time_since_epoch()).count(); + + UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0x0000000000000fffull) | 0x0000000000007000ull | (unix_ts_ms << 16); + UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull; + } + + return col_res; + } +}; + +) // DECLARE_SEVERAL_IMPLEMENTATIONS +#undef DECLARE_SEVERAL_IMPLEMENTATIONS + +class FunctionGenerateUUIDv7 : public TargetSpecific::Default::FunctionGenerateUUIDv7 +{ +public: + explicit FunctionGenerateUUIDv7(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + + #if USE_MULTITARGET_CODE + selector.registerImplementation(); + #endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(GenerateUUIDv7) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp new file mode 100644 index 00000000000..4f336013ca8 --- /dev/null +++ b/src/Functions/serial.cpp @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "Common/Logger.h" +#include + +namespace DB { + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionSerial : public IFunction +{ +private: + mutable zkutil::ZooKeeperPtr zk{nullptr}; + ContextPtr context; + +public: + static constexpr auto name = "serial"; + + explicit FunctionSerial(ContextPtr ctx) : context(ctx) + { + if (ctx->hasZooKeeper()) { + zk = ctx->getZooKeeper(); + } + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(std::move(context)); + } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + + bool isStateful() const override { return true; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool isSuitableForConstantFolding() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool canBeExecutedOnDefaultArguments() const override { return false; } + bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; } + bool hasInformationAboutMonotonicity() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1.", + getName(), arguments.size()); + if (!isStringOrFixedString(arguments[0])) { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Type of argument for function {} doesn't match: passed {}, should be string", + getName(), arguments[0]->getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_res->getData(); + size_t size = input_rows_count; + LOG_INFO(getLogger("Serial Function"), "Size = {}", size); + vec_to.resize(size); + + const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString(); + + // if serial name used first time + zk->createAncestors(serial_path); + zk->createIfNotExists(serial_path, ""); + + Int64 counter; + + if (zk != nullptr) { + // Get Lock in ZooKeeper + // https://zookeeper.apache.org/doc/r3.2.2/recipes.html + + // 1. + if (zk->expired()) { + zk = context->getZooKeeper(); + } + + std::string lock_path = serial_path + "/lock-"; + std::string path_created = zk->create(lock_path, "", zkutil::CreateMode::EphemeralSequential); + Int64 created_sequence_number = std::stoll(path_created.substr(lock_path.size(), path_created.size() - lock_path.size())); + + while (true) { + // 2. + zkutil::Strings children = zk->getChildren(serial_path); + + // 3. + Int64 lowest_child_sequence_number = -1; + for (auto& child : children) { + if (child == "counter") { + continue; + } + std::string child_suffix = child.substr(5, 10); + Int64 seq_number = std::stoll(child_suffix); + + if (lowest_child_sequence_number == -1 || seq_number < lowest_child_sequence_number) { + lowest_child_sequence_number = seq_number; + } + } + + if (lowest_child_sequence_number == created_sequence_number) { + break; + // we have a lock in ZooKeeper, now can get the counter value + } + + // 4. and 5. + Int64 prev_seq_number = created_sequence_number - 1; + std::string to_wait_key = std::to_string(prev_seq_number); + while (to_wait_key.size() != 10) { + to_wait_key = "0" + to_wait_key; + } + + zk->waitForDisappear(lock_path + to_wait_key); + } + + // Now we have a lock + // Update counter in ZooKeeper + std::string counter_path = serial_path + "/counter"; + if (zk->exists(counter_path)) { + std::string counter_string = zk->get(counter_path, nullptr); + counter = std::stoll(counter_string); + + LOG_INFO(getLogger("Serial Function"), "Got counter from Zookeeper = {}", counter); + } else { + counter = 1; + } + zk->createOrUpdate(counter_path, std::to_string(counter + input_rows_count), zkutil::CreateMode::Persistent); + + // Unlock = delete node created on step 1. + zk->deleteEphemeralNodeIfContentMatches(path_created, ""); + } else { + // ZooKeeper is not available + // What to do? + + counter = 1; + } + + // Make a result + for (auto& val : vec_to) { + val = counter; + ++counter; + } + + + return col_res; + } + +}; + +REGISTER_FUNCTION(Serial) +{ + factory.registerFunction(); +} + +} From b97bf456c6e89f36d5225c9befda3738f54cdf31 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Wed, 8 May 2024 18:51:25 +0300 Subject: [PATCH 131/651] try fix build --- programs/keeper/CMakeLists.txt | 1 + src/Coordination/Standalone/Context.cpp | 2 +- src/Coordination/Standalone/Context.h | 2 +- src/Disks/IO/createReadBufferFromFileBase.cpp | 4 ++-- src/Disks/IO/getIOUringReader.cpp | 9 +++++---- src/Disks/IO/getIOUringReader.h | 1 + src/Interpreters/Context.cpp | 4 ++-- src/Interpreters/Context.h | 2 +- src/Storages/StorageFile.cpp | 2 +- 9 files changed, 15 insertions(+), 12 deletions(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index b811868333b..af360e44ff4 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -148,6 +148,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 84e54ed7100..ec5400b7384 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -304,7 +304,7 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co } #if USE_LIBURING -IOUringReader & Context::getIOURingReader() const +IOUringReader & Context::getIOUringReader() const { callOnce(shared->io_uring_reader_initialized, [&] { shared->io_uring_reader = createIOUringReader(); diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index adb9111185f..29a66a0e3c7 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -137,7 +137,7 @@ public: IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING - IOUringReader & getIOURingReader() const; + IOUringReader & getIOUringReader() const; #endif std::shared_ptr getAsyncReadCounters() const; ThreadPool & getThreadPoolWriter() const; diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index f3bb6ae1740..b132e25ac6b 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -100,7 +100,7 @@ std::unique_ptr createReadBufferFromFileBase( else if (settings.local_fs_method == LocalFSReadMethod::io_uring) { #if USE_LIBURING - auto & reader = getIOURingReaderOrThrow(); + auto & reader = getIOUringReaderOrThrow(); res = std::make_unique( reader, settings.priority, diff --git a/src/Disks/IO/getIOUringReader.cpp b/src/Disks/IO/getIOUringReader.cpp index 8e9a9655a41..d9cc6211164 100644 --- a/src/Disks/IO/getIOUringReader.cpp +++ b/src/Disks/IO/getIOUringReader.cpp @@ -1,7 +1,8 @@ -#include "getIOUringReader.h" +#include #if USE_LIBURING +#include #include namespace DB @@ -20,8 +21,8 @@ std::unique_ptr createIOUringReader() IOUringReader & getIOUringReaderOrThrow(ContextPtr context) { - auto reader = context->getIOUringReader(); - if (!reader.isSupported) + auto & reader = context->getIOUringReader(); + if (!reader.isSupported()) { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); } @@ -33,7 +34,7 @@ IOUringReader & getIOUringReaderOrThrow() auto context = Context::getGlobalContextInstance(); if (!context) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); - return getIOUringReaderOrThrow(context) + return getIOUringReaderOrThrow(context); } } diff --git a/src/Disks/IO/getIOUringReader.h b/src/Disks/IO/getIOUringReader.h index ca619785ab4..59e71980750 100644 --- a/src/Disks/IO/getIOUringReader.h +++ b/src/Disks/IO/getIOUringReader.h @@ -6,6 +6,7 @@ #include #include +#include namespace DB { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d847cab013c..db374bc85f9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5176,10 +5176,10 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co } #if USE_LIBURING -IOUringReader & Context::getIOURingReader() const +IOUringReader & Context::getIOUringReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader() + shared->io_uring_reader = createIOUringReader(); }); return *shared->io_uring_reader; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d1ff5b4c2b2..0430db10de2 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1243,7 +1243,7 @@ public: IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING - IOUringReader & getIOURingReader() const; + IOUringReader & getIOUringReader() const; #endif std::shared_ptr getAsyncReadCounters() const; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9bead6d0267..1493e649b60 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -283,7 +283,7 @@ std::unique_ptr selectReadBuffer( else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) { #if USE_LIBURING - auto & reader = getIOURingReaderOrThrow(context); + auto & reader = getIOUringReaderOrThrow(context); res = std::make_unique( reader, Priority{}, From 5f312eb3e3fea61bed2ed85a4d16f17ec9c565b6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 10 May 2024 09:53:03 +0200 Subject: [PATCH 132/651] Correctly handle max_block_size=0 in Numbers source --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 16 ++++++++++++---- .../03149_numbers_max_block_size_zero.reference | 2 ++ .../03149_numbers_max_block_size_zero.sql | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference create mode 100644 tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 11371578c79..759dc7354df 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -176,8 +176,9 @@ protected: { std::lock_guard lock(ranges_state->mutex); - UInt64 need = base_block_size_; + bool without_block_size_limit = need == 0; + UInt64 size = 0; /// how many item found. /// find start @@ -185,14 +186,21 @@ protected: end = start; /// find end - while (need != 0) + while (without_block_size_limit || need != 0) { UInt128 can_provide = end.offset_in_ranges == ranges.size() ? static_cast(0) : ranges[end.offset_in_ranges].size - end.offset_in_range; + if (can_provide == 0) break; - if (can_provide > need) + if (without_block_size_limit) + { + end.offset_in_ranges++; + end.offset_in_range = 0; + size += static_cast(can_provide); + } + else if (can_provide > need) { end.offset_in_range += need; size += need; @@ -527,7 +535,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() checkLimits(size_t(total_size)); - if (total_size / max_block_size < num_streams) + if (max_block_size != 0 && total_size / max_block_size < num_streams) num_streams = static_cast(total_size / max_block_size); if (num_streams == 0) diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference new file mode 100644 index 00000000000..896f02d1185 --- /dev/null +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference @@ -0,0 +1,2 @@ +1320 +1320 diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql new file mode 100644 index 00000000000..afc4e4d57a5 --- /dev/null +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql @@ -0,0 +1,2 @@ +SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0; +SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 1; From a31ee9891f610a14513c622bc81dcb25eaf25eb5 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 10 May 2024 10:36:59 +0200 Subject: [PATCH 133/651] Move setting to 24.5 version in SettingsChangesHistory --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e8cf1e98d27..3c1249d29e5 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,13 +87,13 @@ static std::map sett { {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, From fbf8dcb7feb480175f76f7fa9252cf80f3ca3cc4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 10 May 2024 11:55:24 +0200 Subject: [PATCH 134/651] Apply suggestions from code review Co-authored-by: Antonio Andelic --- src/Columns/ColumnDynamic.cpp | 7 +++---- src/Columns/ColumnVariant.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index a1dd60f4748..629df476591 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -48,8 +48,8 @@ ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, variant_info.variant_name_to_discriminator.reserve(variants.size()); for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr) { - variant_info.variant_names.push_back(variants[discr]->getName()); - variant_info.variant_name_to_discriminator[variant_info.variant_names.back()] = discr; + const auto & variant_name = variant_info.variant_names.emplace_back(variants[discr]->getName()); + variant_info.variant_name_to_discriminator[variant_name] = discr; } return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_); @@ -133,8 +133,7 @@ void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePt for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr) { - String name = new_variants[discr]->getName(); - new_variant_names.push_back(name); + const auto & name = new_variant_names.emplace_back(new_variants[discr]->getName()); new_variant_name_to_discriminator[name] = discr; auto current_it = variant_info.variant_name_to_discriminator.find(name); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 8f703ea17d9..e5a4498f340 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -189,7 +189,7 @@ public: void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping); void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector & global_discriminators_mapping); - /// Methods for insertrion into a specific variant. + /// Methods for insertion into a specific variant. void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n); void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length); void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length); From e7c7eb159a44beb52cd3c7f2634fd8f13214ad71 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 10 May 2024 11:32:27 +0000 Subject: [PATCH 135/651] Apply suggestions from the code review --- src/Columns/ColumnDynamic.cpp | 41 ++++--------------- src/Columns/tests/gtest_column_dynamic.cpp | 26 ++++++------ src/DataTypes/DataTypeDynamic.h | 5 +-- .../Serializations/SerializationDynamic.cpp | 7 +++- src/Functions/FunctionsConversion.cpp | 9 ++-- src/Interpreters/InterpreterInsertQuery.cpp | 6 ++- .../Algorithms/CollapsingSortedAlgorithm.cpp | 8 +++- 7 files changed, 44 insertions(+), 58 deletions(-) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 629df476591..76f536a3409 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -80,41 +80,14 @@ bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant) DataTypes all_variants = current_variants; all_variants.push_back(new_variant); auto new_variant_type = std::make_shared(all_variants); - const auto & new_variants = assert_cast(*new_variant_type).getVariants(); - - std::vector current_to_new_discriminators; - current_to_new_discriminators.resize(variant_info.variant_names.size()); - Names new_variant_names; - new_variant_names.reserve(new_variants.size()); - std::unordered_map new_variant_name_to_discriminator; - new_variant_name_to_discriminator.reserve(new_variants.size()); - std::vector> new_variant_columns_and_discriminators_to_add; - new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size()); - - for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr) - { - String name = new_variants[discr]->getName(); - new_variant_names.push_back(name); - new_variant_name_to_discriminator[name] = discr; - auto it = variant_info.variant_name_to_discriminator.find(name); - if (it == variant_info.variant_name_to_discriminator.end()) - new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr); - else - current_to_new_discriminators[it->second] = discr; - } - - variant_info.variant_type = new_variant_type; - variant_info.variant_name = new_variant_type->getName(); - variant_info.variant_names = new_variant_names; - variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator; - assert_cast(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add)); - variant_mappings_cache.clear(); + updateVariantInfoAndExpandVariantColumn(new_variant_type); return true; } void ColumnDynamic::addStringVariant() { - addNewVariant(std::make_shared()); + if (!addNewVariant(std::make_shared())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add String variant to Dynamic column, it's a bug"); } void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type) @@ -704,13 +677,13 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source result_variants.reserve(max_dynamic_types); /// Add String variant in advance. result_variants.push_back(std::make_shared()); - size_t i = 0; - while (result_variants.size() != max_dynamic_types && i < variants_with_sizes.size()) + for (const auto & [_, variant] : variants_with_sizes) { - const auto & variant = variants_with_sizes[i].second; + if (result_variants.size() == max_dynamic_types) + break; + if (variant->getName() != "String") result_variants.push_back(variant); - ++i; } result_variant_type = std::make_shared(result_variants); diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp index 4c209f7d8a9..a2862b09de1 100644 --- a/src/Columns/tests/gtest_column_dynamic.cpp +++ b/src/Columns/tests/gtest_column_dynamic.cpp @@ -195,7 +195,7 @@ TEST(ColumnDynamic, InsertFromOverflow1) column_to->insertFrom(*column_from, 1); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, "42.42"); @@ -220,7 +220,7 @@ TEST(ColumnDynamic, InsertFromOverflow2) ASSERT_EQ(field, 42); column_to->insertFrom(*column_from, 1); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); field = (*column_to)[column_to->size() - 1]; ASSERT_EQ(field, "42.42"); @@ -299,7 +299,7 @@ TEST(ColumnDynamic, InsertManyFromOverflow1) column_to->insertManyFrom(*column_from, 1, 2); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, "42.42"); @@ -332,7 +332,7 @@ TEST(ColumnDynamic, InsertManyFromOverflow2) column_to->insertManyFrom(*column_from, 1, 2); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); field = (*column_to)[column_to->size() - 2]; ASSERT_EQ(field, "42.42"); @@ -406,7 +406,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow1) ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); auto field = (*column_to)[column_to->size() - 4]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 3]; @@ -429,7 +429,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow2) ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); auto field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 2]; @@ -451,7 +451,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow3) ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); auto field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 2]; @@ -470,9 +470,9 @@ TEST(ColumnDynamic, InsertRangeFromOverflow4) auto column_to = getDynamicWithManyVariants(254); column_to->insertRangeFrom(*column_from, 0, 3); ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); auto field = (*column_to)[column_to->size() - 3]; ASSERT_EQ(field, Field("42")); field = (*column_to)[column_to->size() - 2]; @@ -495,7 +495,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow5) ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); auto field = (*column_to)[column_to->size() - 4]; ASSERT_EQ(field, Field(42)); field = (*column_to)[column_to->size() - 3]; @@ -522,8 +522,8 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6) ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)")); auto field = (*column_to)[column_to->size() - 5]; ASSERT_EQ(field, Field("44")); @@ -620,7 +620,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow) ASSERT_EQ((*column_from)[column_from->size() - 2], "str"); ASSERT_EQ((*column_from)[column_from->size() - 1], Null()); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8")); - ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); + ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64")); ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String")); } diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h index 9fc727fd9c8..bd3d822fbb6 100644 --- a/src/DataTypes/DataTypeDynamic.h +++ b/src/DataTypes/DataTypeDynamic.h @@ -2,9 +2,6 @@ #include -#define DEFAULT_MAX_DYNAMIC_TYPES 32 - - namespace DB { @@ -46,6 +43,8 @@ public: size_t getMaxDynamicTypes() const { return max_dynamic_types; } private: + static constexpr size_t DEFAULT_MAX_DYNAMIC_TYPES = 32; + SerializationPtr doGetDefaultSerialization() const override; String doGetName() const override; diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 5e6106f560f..d0ecc3b80a2 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -118,7 +118,12 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( for (size_t i = 0; i != variant_info.variant_names.size(); ++i) { size_t size = 0; - /// Use statistics from column if it was created during merge. + /// Check if we can use statistics stored in the column. There are 2 possible sources + /// of this statistics: + /// - statistics calculated during merge of some data parts (Statistics::Source::MERGE) + /// - statistics read from the data part during deserialization of Dynamic column (Statistics::Source::READ). + /// We can rely only on statistics calculated during the merge, because column with statistics that was read + /// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated. if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE) size = statistics.data.at(variant_info.variant_names[i]); /// Otherwise we can use only variant sizes from current column. diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index b01643a9532..910168d8010 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -575,7 +575,7 @@ ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) template struct ConvertImplGenericToString { - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const FormatSettings & format_settings) { static_assert(std::is_same_v || std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); @@ -596,7 +596,6 @@ struct ConvertImplGenericToString auto & write_buffer = write_helper.getWriteBuffer(); - FormatSettings format_settings; auto serialization = type.getDefaultSerialization(); for (size_t row = 0; row < size; ++row) { @@ -2299,7 +2298,7 @@ private: if constexpr (std::is_same_v) { if (from_type->getCustomSerialization()) - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); } bool done = false; @@ -2332,7 +2331,7 @@ private: /// Generic conversion of any type to String. if (std::is_same_v) { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", @@ -5060,7 +5059,7 @@ private: { ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); }; return true; } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 6c8e662477d..128854e87ba 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -552,7 +552,11 @@ BlockIO InterpreterInsertQuery::execute() { /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && !isDynamic(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name)) + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); } } diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index f5e4c88fcd0..07ee8f4ddef 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -31,7 +31,13 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm( LoggerPtr log_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) + : IMergingAlgorithmWithSharedChunks( + header_, + num_inputs, + std::move(description_), + out_row_sources_buf_, + max_row_refs, + std::make_unique(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)) , sign_column_number(header_.getPositionByName(sign_column)) , only_positive_sign(only_positive_sign_) , log(log_) From 4f1a97644ef6a6f462c01a0fb4046d07448d1d8c Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 10 May 2024 11:34:16 +0000 Subject: [PATCH 136/651] Use nested column properly in SerializationSparse::enumerateStreams --- src/DataTypes/Serializations/SerializationSparse.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index f9228069b90..73488d308bb 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -170,7 +170,7 @@ void SerializationSparse::enumerateStreams( auto next_data = SubstreamData(nested) .withType(data.type) - .withColumn(column_sparse ? column_sparse->getValuesPtr() : nullptr) + .withColumn(column_sparse ? column_sparse->getValuesPtr() : data.column) .withSerializationInfo(data.serialization_info); nested->enumerateStreams(settings, callback, next_data); From fa5898a3cd5a9b4276eb75e39c4475dfdf722e3b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 10 May 2024 13:46:56 +0200 Subject: [PATCH 137/651] Refactor data part writer --- src/Storages/MergeTree/IMergeTreeDataPart.h | 21 ++-- .../MergeTree/IMergeTreeDataPartWriter.cpp | 119 +++++++++++++++++- .../MergeTree/IMergeTreeDataPartWriter.h | 57 ++++++++- .../MergeTree/IMergedBlockOutputStream.cpp | 17 ++- .../MergeTree/IMergedBlockOutputStream.h | 15 ++- src/Storages/MergeTree/MergeTask.cpp | 3 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 48 ++++--- .../MergeTree/MergeTreeDataPartCompact.h | 17 +-- .../MergeTree/MergeTreeDataPartWide.cpp | 18 ++- .../MergeTree/MergeTreeDataPartWide.h | 17 +-- .../MergeTreeDataPartWriterCompact.cpp | 27 ++-- .../MergeTreeDataPartWriterCompact.h | 9 +- .../MergeTreeDataPartWriterOnDisk.cpp | 32 +++-- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 9 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 69 +++++----- .../MergeTree/MergeTreeDataPartWriterWide.h | 9 +- .../MergeTree/MergeTreeDataWriter.cpp | 4 +- src/Storages/MergeTree/MergeTreePartition.cpp | 13 +- src/Storages/MergeTree/MergeTreePartition.h | 4 +- .../MergeTree/MergedBlockOutputStream.cpp | 29 +++-- .../MergeTree/MergedBlockOutputStream.h | 2 +- .../MergedColumnOnlyOutputStream.cpp | 11 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- 24 files changed, 409 insertions(+), 145 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index ba2ff2ed6fe..4ec5b3f5f8a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -74,7 +74,7 @@ public: using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; - using MergeTreeWriterPtr = std::unique_ptr; +// using MergeTreeWriterPtr = std::unique_ptr; using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; @@ -106,15 +106,16 @@ public: const ValueSizeMap & avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0; - virtual MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) = 0; +//// virtual MergeTreeWriterPtr getWriter( +//// const NamesAndTypesList & columns_list, +//// const StorageMetadataPtr & metadata_snapshot, +//// const std::vector & indices_to_recalc, +//// const Statistics & stats_to_recalc_, +//// const CompressionCodecPtr & default_codec_, +//// const MergeTreeWriterSettings & writer_settings, +//// const MergeTreeIndexGranularity & computed_index_granularity) = 0; +// TODO: remove? virtual bool isStoredOnDisk() const = 0; virtual bool isStoredOnRemoteDisk() const = 0; @@ -168,6 +169,8 @@ public: const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; } + const SerializationByName & getSerializations() const { return serializations; } + SerializationPtr getSerialization(const String & column_name) const; SerializationPtr tryGetSerialization(const String & column_name) const; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 2488c63e309..c67e148d011 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,8 +1,15 @@ #include +#include "Storages/MergeTree/MergeTreeSettings.h" namespace DB { +namespace ErrorCodes +{ + extern const int NO_SUCH_COLUMN_IN_TABLE; +} + + Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation) { Block result; @@ -38,13 +45,23 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per } IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( - const MergeTreeMutableDataPartPtr & data_part_, +// const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : data_part(data_part_) - , storage(data_part_->storage) + : data_part_name(data_part_name_) + , serializations(serializations_) + , data_part_storage(data_part_storage_) + , index_granularity_info(index_granularity_info_) + + , storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) , columns_list(columns_list_) , settings(settings_) @@ -60,6 +77,102 @@ Columns IMergeTreeDataPartWriter::releaseIndexColumns() std::make_move_iterator(index_columns.end())); } +SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const +{ + auto it = serializations.find(column_name); + if (it == serializations.end()) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column or subcolumn {} in part {}", column_name, data_part_name); + + return it->second; +} + +ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const +{ + auto get_codec_or_default = [&](const auto & column_desc) + { + return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc(); + }; + + const auto & columns = metadata_snapshot->getColumns(); + if (const auto * column_desc = columns.tryGet(column_name)) + return get_codec_or_default(*column_desc); + +///// TODO: is this needed? +// if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) +// return get_codec_or_default(*virtual_desc); +// + return default_codec->getFullCodecDesc(); +} + + IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default; + +MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + + + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( + MergeTreeDataPartType part_type, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity) +{ + if (part_type == MergeTreeDataPartType::Compact) + return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + else if (part_type == MergeTreeDataPartType::Wide) + return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString()); +} + } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 3f359904ddd..ec04fd5f8a8 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -7,6 +7,8 @@ #include #include #include +#include "Storages/MergeTree/MergeTreeDataPartType.h" +#include "Storages/MergeTree/MergeTreeSettings.h" namespace DB @@ -22,7 +24,15 @@ class IMergeTreeDataPartWriter : private boost::noncopyable { public: IMergeTreeDataPartWriter( - const MergeTreeMutableDataPartPtr & data_part_, +// const MergeTreeMutableDataPartPtr & data_part_, + + const String & data_part_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeWriterSettings & settings_, @@ -39,10 +49,30 @@ public: Columns releaseIndexColumns(); const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } + SerializationPtr getSerialization(const String & column_name) const; + + ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; + + IDataPartStorage & getDataPartStorage() { return *data_part_storage; } + protected: - const MergeTreeMutableDataPartPtr data_part; - const MergeTreeData & storage; +// const MergeTreeMutableDataPartPtr data_part; // TODO: remove + + /// Serializations for every columns and subcolumns by their names. + String data_part_name; + SerializationByName serializations; + MutableDataPartStoragePtr data_part_storage; + MergeTreeIndexGranularityInfo index_granularity_info; + + +// const MergeTreeData & storage; // TODO: remove + + const MergeTreeSettingsPtr storage_settings; + const size_t low_cardinality_max_dictionary_size = 0; // TODO: pass it in ctor + const bool low_cardinality_use_single_dictionary_for_part = true; // TODO: pass it in ctor + + const StorageMetadataPtr metadata_snapshot; const NamesAndTypesList columns_list; const MergeTreeWriterSettings settings; @@ -52,4 +82,25 @@ protected: MutableColumns index_columns; }; +using MergeTreeDataPartWriterPtr = std::unique_ptr; + +MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( + MergeTreeDataPartType part_type, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity); + + } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index c8d6aa0ba65..f99adf7c4db 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -2,25 +2,30 @@ #include #include #include +#include "Storages/MergeTree/IDataPartStorage.h" +#include "Storages/StorageSet.h" namespace DB { IMergedBlockOutputStream::IMergedBlockOutputStream( - const MergeTreeMutableDataPartPtr & data_part, +// const MergeTreeMutableDataPartPtr & data_part, + const MergeTreeSettingsPtr & storage_settings_, + MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_) - : storage(data_part->storage) + //: storage(data_part->storage) + : storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) - , data_part_storage(data_part->getDataPartStoragePtr()) + , data_part_storage(data_part_storage_)//data_part->getDataPartStoragePtr()) , reset_columns(reset_columns_) { if (reset_columns) { SerializationInfo::Settings info_settings = { - .ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization, + .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization,//storage.getSettings()->ratio_of_defaults_for_sparse_serialization, .choose_kind = false, }; @@ -42,7 +47,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( return {}; for (const auto & column : empty_columns) - LOG_TRACE(storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name); + LOG_TRACE(data_part->storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name); /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; @@ -91,7 +96,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( } else /// If we have no file in checksums it doesn't exist on disk { - LOG_TRACE(storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr); + LOG_TRACE(data_part->storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr); itr = remove_files.erase(itr); } } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index ca4e3899b29..b6f279e6d58 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,10 +1,12 @@ #pragma once #include "Storages/MergeTree/IDataPartStorage.h" +#include "Storages/MergeTree/MergeTreeSettings.h" #include #include #include #include +#include "Common/Logger.h" namespace DB { @@ -13,7 +15,9 @@ class IMergedBlockOutputStream { public: IMergedBlockOutputStream( - const MergeTreeMutableDataPartPtr & data_part, +// const MergeTreeMutableDataPartPtr & data_part, + const MergeTreeSettingsPtr & storage_settings_, + MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_); @@ -39,11 +43,16 @@ protected: SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums); - const MergeTreeData & storage; +// const MergeTreeData & storage; // TODO: remove +//// + MergeTreeSettingsPtr storage_settings; + LoggerPtr log; +//// + StorageMetadataPtr metadata_snapshot; MutableDataPartStoragePtr data_part_storage; - IMergeTreeDataPart::MergeTreeWriterPtr writer; + MergeTreeDataPartWriterPtr writer; bool reset_columns = false; SerializationInfoByName new_serialization_infos; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 34e17e40a74..1b5ad0d81a7 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include namespace DB @@ -378,7 +379,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()), ctx->compression_codec, - global_ctx->txn, + global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, ctx->blocks_are_granules_size, global_ctx->context->getWriteSettings()); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 440c62213a3..8a96e4c9f04 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8423,7 +8423,7 @@ std::pair MergeTreeData::createE MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), Statistics{}, - compression_codec, txn); + compression_codec, txn ? txn->tid : Tx::PrehistoricTID); bool sync_on_insert = settings->fsync_after_insert; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 418b2d8f81b..eebbe3110c0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -47,27 +47,37 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( avg_value_size_hints, profile_callback, CLOCK_MONOTONIC_COARSE); } -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) +MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity) { - NamesAndTypesList ordered_columns_list; - std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), - [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; }); - - /// Order of writing is important in compact format - ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) - { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); - +////// TODO: fix the order of columns +//// +//// NamesAndTypesList ordered_columns_list; +//// std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), +//// [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; }); +//// +//// /// Order of writing is important in compact format +//// ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) +//// { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); +//// return std::make_unique( - shared_from_this(), ordered_columns_list, metadata_snapshot, - indices_to_recalc, stats_to_recalc_, getMarksFileExtension(), - default_codec_, writer_settings, computed_index_granularity); + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 3a4e7b95f33..5a57d778b7d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -40,15 +40,16 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; +// MergeTreeWriterPtr getWriter( +// const NamesAndTypesList & columns_list, +// const StorageMetadataPtr & metadata_snapshot, +// const std::vector & indices_to_recalc, +// const Statistics & stats_to_recalc_, +// const CompressionCodecPtr & default_codec_, +// const MergeTreeWriterSettings & writer_settings, +// const MergeTreeIndexGranularity & computed_index_granularity) override; +// TODO: remove? bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index fc3108e522a..c99cff258e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -53,20 +53,26 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( profile_callback); } -IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( +MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, + const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { - return std::make_unique( - shared_from_this(), columns_list, - metadata_snapshot, indices_to_recalc, stats_to_recalc_, - getMarksFileExtension(), - default_codec_, writer_settings, computed_index_granularity); + return std::make_unique(data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 84eeec4211b..45d0fbbebec 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -35,15 +35,16 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; - MergeTreeWriterPtr getWriter( - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) override; +// MergeTreeWriterPtr getWriter( +// const NamesAndTypesList & columns_list, +// const StorageMetadataPtr & metadata_snapshot, +// const std::vector & indices_to_recalc, +// const Statistics & stats_to_recalc_, +// const CompressionCodecPtr & default_codec_, +// const MergeTreeWriterSettings & writer_settings, +// const MergeTreeIndexGranularity & computed_index_granularity) override; +// TODO: remove? bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1605e5cdb9a..6e8ea1a915b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -10,7 +10,14 @@ namespace ErrorCodes } MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( - const MergeTreeMutableDataPartPtr & data_part_, +// const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, @@ -19,23 +26,26 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, + : MergeTreeDataPartWriterOnDisk( + data_part_name_, logger_name_, serializations_, + data_part_storage_, index_granularity_info_, storage_settings_, + columns_list_, metadata_snapshot_, indices_to_recalc_, stats_to_recalc, marks_file_extension_, default_codec_, settings_, index_granularity_) - , plain_file(data_part_->getDataPartStorage().writeFile( + , plain_file(getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, settings_.query_write_settings)) , plain_hashing(*plain_file) { - marks_file = data_part_->getDataPartStorage().writeFile( + marks_file = getDataPartStorage().writeFile( MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, 4096, settings_.query_write_settings); marks_file_hashing = std::make_unique(*marks_file); - if (data_part_->index_granularity_info.mark_type.compressed) + if (index_granularity_info.mark_type.compressed) { marks_compressor = std::make_unique( *marks_file_hashing, @@ -45,10 +55,9 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( marks_source_hashing = std::make_unique(*marks_compressor); } - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + auto compression = getCodecDescOrDefault(column.name, default_codec); addStreams(column, compression); } } @@ -81,7 +90,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, compressed_streams.emplace(stream_name, stream); }; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + getSerialization(column.name)->enumerateStreams(callback, column.type); } namespace @@ -230,7 +239,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeBinaryLittleEndian(static_cast(0), marks_out); writeColumnSingleGranule( - block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), + block.getByName(name_and_type->name), getSerialization(name_and_type->name), stream_getter, granule.start_row, granule.rows_to_write); /// Each type always have at least one substream diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index ddb6178dce6..3bec4c7e988 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -11,7 +11,14 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterCompact( - const MergeTreeMutableDataPartPtr & data_part, +// const MergeTreeMutableDataPartPtr & data_part, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 491d2399b82..13892c17577 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -140,7 +140,13 @@ void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(Merg MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( - const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeIndices & indices_to_recalc_, @@ -149,7 +155,9 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_) + : IMergeTreeDataPartWriter( + data_part_name_, serializations_, data_part_storage_, index_granularity_info_, + storage_settings_, columns_list_, metadata_snapshot_, settings_, index_granularity_) , skip_indices(indices_to_recalc_) , stats(stats_to_recalc_) , marks_file_extension(marks_file_extension_) @@ -157,14 +165,14 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( , compute_granularity(index_granularity.empty()) , compress_primary_key(settings.compress_primary_key) , execution_stats(skip_indices.size(), stats.size()) - , log(getLogger(storage.getLogName() + " (DataPartWriter)")) + , log(getLogger(logger_name_ + " (DataPartWriter)")) { if (settings.blocks_are_granules_size && !index_granularity.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't take information about index granularity from blocks, when non empty index_granularity array specified"); - if (!data_part->getDataPartStorage().exists()) - data_part->getDataPartStorage().createDirectories(); + if (!getDataPartStorage().exists()) + getDataPartStorage().createDirectories(); if (settings.rewrite_primary_key) initPrimaryIndex(); @@ -223,7 +231,7 @@ static size_t computeIndexGranularityImpl( size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const { - const auto storage_settings = storage.getSettings(); +// const auto storage_settings = storage.getSettings(); return computeIndexGranularityImpl( block, storage_settings->index_granularity_bytes, @@ -237,7 +245,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() if (metadata_snapshot->hasPrimaryKey()) { String index_name = "primary" + getIndexExtension(compress_primary_key); - index_file_stream = data_part->getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); + index_file_stream = getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings); index_file_hashing_stream = std::make_unique(*index_file_stream); if (compress_primary_key) @@ -256,7 +264,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() String stats_name = stat_ptr->getFileName(); stats_streams.emplace_back(std::make_unique>( stats_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stats_name, STAT_FILE_SUFFIX, default_codec, settings.max_compress_block_size, settings.query_write_settings)); @@ -275,7 +283,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() skip_indices_streams.emplace_back( std::make_unique>( stream_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stream_name, skip_index->getSerializedFileExtension(), stream_name, marks_file_extension, default_codec, settings.max_compress_block_size, @@ -285,7 +293,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() GinIndexStorePtr store = nullptr; if (typeid_cast(&*skip_index) != nullptr) { - store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); + store = std::make_shared(stream_name, data_part_storage, data_part_storage, /*storage.getSettings()*/storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); @@ -498,7 +506,7 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync) } for (size_t i = 0; i < stats.size(); ++i) - LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name); + LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part_name); } void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums) @@ -524,7 +532,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync) store.second->finalize(); for (size_t i = 0; i < skip_indices.size(); ++i) - LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name); + LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part_name); gin_index_stores.clear(); skip_indices_streams.clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 9f2cc3970fa..39f33217b57 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -104,7 +104,14 @@ public: using StatisticStreamPtr = std::unique_ptr>; MergeTreeDataPartWriterOnDisk( - const MergeTreeMutableDataPartPtr & data_part_, +// const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 6a3b08d4d65..1f68a9d31a1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -76,7 +76,14 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, } MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( - const MergeTreeMutableDataPartPtr & data_part_, +// const MergeTreeMutableDataPartPtr & data_part_, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, @@ -85,14 +92,16 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_, - indices_to_recalc_, stats_to_recalc_, marks_file_extension_, - default_codec_, settings_, index_granularity_) + : MergeTreeDataPartWriterOnDisk( + data_part_name_, logger_name_, serializations_, + data_part_storage_, index_granularity_info_, storage_settings_, + columns_list_, metadata_snapshot_, + indices_to_recalc_, stats_to_recalc_, marks_file_extension_, + default_codec_, settings_, index_granularity_) { - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + auto compression = getCodecDescOrDefault(column.name, default_codec); addStreams(column, compression); } } @@ -105,7 +114,7 @@ void MergeTreeDataPartWriterWide::addStreams( { assert(!substream_path.empty()); - auto storage_settings = storage.getSettings(); +// auto storage_settings = storage.getSettings(); auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); String stream_name; @@ -149,7 +158,7 @@ void MergeTreeDataPartWriterWide::addStreams( column_streams[stream_name] = std::make_unique>( stream_name, - data_part->getDataPartStoragePtr(), + data_part_storage, stream_name, DATA_FILE_EXTENSION, stream_name, marks_file_extension, compression_codec, @@ -163,7 +172,7 @@ void MergeTreeDataPartWriterWide::addStreams( }; ISerialization::SubstreamPath path; - data_part->getSerialization(column.name)->enumerateStreams(callback, column.type); + getSerialization(column.name)->enumerateStreams(callback, column.type); } const String & MergeTreeDataPartWriterWide::getStreamName( @@ -264,7 +273,7 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm { auto & column = block_to_write.getByName(it->name); - if (data_part->getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE) + if (getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE) column.column = recursiveRemoveSparse(column.column); if (permutation) @@ -334,7 +343,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( min_compress_block_size = value->safeGet(); if (!min_compress_block_size) min_compress_block_size = settings.min_compress_block_size; - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; auto stream_name = getStreamName(column, substream_path); @@ -368,7 +377,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( ISerialization::SerializeBinaryBulkSettings & serialize_settings, const Granule & granule) { - const auto & serialization = data_part->getSerialization(name_and_type.name); + const auto & serialization = getSerialization(name_and_type.name); serialization->serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. @@ -398,7 +407,7 @@ void MergeTreeDataPartWriterWide::writeColumn( const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); - auto serialization = data_part->getSerialization(name_and_type.name); + auto serialization = getSerialization(name_and_type.name); if (inserted) { @@ -407,11 +416,11 @@ void MergeTreeDataPartWriterWide::writeColumn( serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } - const auto & global_settings = storage.getContext()->getSettingsRef(); +// const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; for (const auto & granule : granules) { @@ -460,7 +469,7 @@ void MergeTreeDataPartWriterWide::writeColumn( void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePair & name_type) { const auto & [name, type] = name_type; - const auto & serialization = data_part->getSerialization(name_type.name); + const auto & serialization = getSerialization(name_type.name); if (!type->isValueRepresentedByNumber() || type->haveSubtypes() || serialization->getKind() != ISerialization::Kind::DEFAULT) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type->getName()); @@ -470,21 +479,21 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai String bin_path = escaped_name + DATA_FILE_EXTENSION; /// Some columns may be removed because of ttl. Skip them. - if (!data_part->getDataPartStorage().exists(mrk_path)) + if (!getDataPartStorage().exists(mrk_path)) return; - auto mrk_file_in = data_part->getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); + auto mrk_file_in = getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt); std::unique_ptr mrk_in; - if (data_part->index_granularity_info.mark_type.compressed) + if (index_granularity_info.mark_type.compressed) mrk_in = std::make_unique(std::move(mrk_file_in)); else mrk_in = std::move(mrk_file_in); - DB::CompressedReadBufferFromFile bin_in(data_part->getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); + DB::CompressedReadBufferFromFile bin_in(getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; - UInt64 index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; + UInt64 index_granularity_rows = index_granularity_info.fixed_index_granularity; size_t mark_num; @@ -500,7 +509,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (settings.can_use_adaptive_granularity) readBinaryLittleEndian(index_granularity_rows, *mrk_in); else - index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; + index_granularity_rows = index_granularity_info.fixed_index_granularity; if (must_be_last) { @@ -533,7 +542,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", - data_part->getDataPartStorage().getFullPath(), + getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); @@ -596,10 +605,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) { - const auto & global_settings = storage.getContext()->getSettingsRef(); +// const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { @@ -622,7 +631,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum if (!serialization_states.empty()) { serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns); - data_part->getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); + getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); } if (write_final_mark) @@ -665,7 +674,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) { if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes() - && data_part->getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT) + && getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT) { validateColumnOfFixedSize(column); } @@ -708,7 +717,7 @@ void MergeTreeDataPartWriterWide::writeFinalMark( { writeSingleMark(column, offset_columns, 0); /// Memoize information about offsets - data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index f5ff323563d..ef9c4ab17dc 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -21,7 +21,14 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterWide( - const MergeTreeMutableDataPartPtr & data_part, +// const MergeTreeMutableDataPartPtr & data_part, + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index daa163d741c..0f05c171230 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -600,7 +600,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( indices, MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()), compression_codec, - context->getCurrentTransaction(), + context->getCurrentTransaction() ? context->getCurrentTransaction()->tid : Tx::PrehistoricTID, false, false, context->getWriteSettings()); @@ -738,7 +738,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( MergeTreeIndices{}, Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result. compression_codec, - NO_TRANSACTION_PTR, + Tx::PrehistoricTID, false, false, data.getContext()->getWriteSettings()); out->writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index ddeaf69136a..c2ef7f98388 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -12,6 +12,7 @@ #include #include #include +#include "Interpreters/Context_fwd.h" #include #include @@ -413,12 +414,14 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {}); } -std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const +std::unique_ptr MergeTreePartition::store(/*const MergeTreeData & storage,*/ + StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, + IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const { - auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - const auto & context = storage.getContext(); - const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block; - return store(partition_key_sample, data_part_storage, checksums, context->getWriteSettings()); +// auto metadata_snapshot = storage.getInMemoryMetadataPtr(); +// const auto & context = storage.getContext(); + const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage_context).sample_block; + return store(partition_key_sample, data_part_storage, checksums, storage_context->getWriteSettings()); } std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 78b141f26ec..04175d6f927 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -44,7 +44,9 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. - [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; + [[nodiscard]] std::unique_ptr store(//const MergeTreeData & storage, + StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, + IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; void assign(const MergeTreePartition & other) { value = other.value; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9f641fd8eb5..2441d941952 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -21,35 +21,40 @@ MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeIndices & skip_indices, const Statistics & statistics, CompressionCodecPtr default_codec_, - const MergeTreeTransactionPtr & txn, + TransactionID tid, bool reset_columns_, bool blocks_are_granules_size, const WriteSettings & write_settings_, const MergeTreeIndexGranularity & computed_index_granularity) - : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) , write_settings(write_settings_) { MergeTreeWriterSettings writer_settings( - storage.getContext()->getSettings(), + data_part->storage.getContext()->getSettings(), write_settings, - storage.getSettings(), + storage_settings, data_part->index_granularity_info.mark_type.adaptive, /* rewrite_primary_key = */ true, blocks_are_granules_size); +// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart if (data_part->isStoredOnDisk()) data_part_storage->createDirectories(); - /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; +// /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. +// TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; /// NOTE do not pass context for writing to system.transactions_info_log, /// because part may have temporary name (with temporary block numbers). Will write it later. data_part->version.setCreationTID(tid, nullptr); data_part->storeVersionMetadata(); - writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity); + writer = createMergeTreeDataPartWriter(data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, + columns_list, metadata_snapshot, skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity); } /// If data is pre-sorted. @@ -208,7 +213,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (new_part->isProjectionPart()) { - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) + if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { auto count_out = new_part->getDataPartStorage().writeFile("count.txt", 4096, write_settings); HashingWriteBuffer count_out_hashing(*count_out); @@ -234,14 +239,16 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis written_files.emplace_back(std::move(out)); } - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - if (auto file = new_part->partition.store(storage, new_part->getDataPartStorage(), checksums)) + if (auto file = new_part->partition.store(//storage, + new_part->storage.getInMemoryMetadataPtr(), new_part->storage.getContext(), + new_part->getDataPartStorage(), checksums)) written_files.emplace_back(std::move(file)); if (new_part->minmax_idx->initialized) { - auto files = new_part->minmax_idx->store(storage, new_part->getDataPartStorage(), checksums); + auto files = new_part->minmax_idx->store(new_part->storage, new_part->getDataPartStorage(), checksums); for (auto & file : files) written_files.emplace_back(std::move(file)); } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 540b3b3bffa..c1e3d75fefc 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -22,7 +22,7 @@ public: const MergeTreeIndices & skip_indices, const Statistics & statistics, CompressionCodecPtr default_codec_, - const MergeTreeTransactionPtr & txn, + TransactionID tid, bool reset_columns_ = false, bool blocks_are_granules_size = false, const WriteSettings & write_settings = {}, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 728b2e38833..51853384012 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -20,11 +20,11 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) , header(header_) { const auto & global_settings = data_part->storage.getContext()->getSettings(); - const auto & storage_settings = data_part->storage.getSettings(); +// const auto & storage_settings = data_part->storage.getSettings(); MergeTreeWriterSettings writer_settings( global_settings, @@ -33,11 +33,16 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( index_granularity_info ? index_granularity_info->mark_type.adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */ false); - writer = data_part->getWriter( + writer = createMergeTreeDataPartWriter( + data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, header.getNamesAndTypesList(), metadata_snapshot_, indices_to_recalc, stats_to_recalc_, + data_part->getMarksFileExtension(), default_codec, writer_settings, index_granularity); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 55d845dfbb9..54077055d96 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1660,7 +1660,7 @@ private: skip_indices, stats_to_rewrite, ctx->compression_codec, - ctx->txn, + ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, /*blocks_are_granules_size=*/ false, ctx->context->getWriteSettings(), From 32b8aba8ef1bf9a0b890065a5d719a002cee8bb5 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 10 May 2024 14:12:34 +0200 Subject: [PATCH 138/651] Style --- src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index c67e148d011..b46fbc5fc9e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int NO_SUCH_COLUMN_IN_TABLE; } @@ -144,7 +145,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const MergeTreeIndexGranularity & computed_index_granularity); - MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( MergeTreeDataPartType part_type, const String & data_part_name_, From 60c721c21b645bad32dbe361b502e9132474793a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 10 May 2024 12:20:27 +0000 Subject: [PATCH 139/651] Fix build after conflict resolution --- src/Functions/FunctionsConversion.cpp | 3 ++- src/Storages/MergeTree/MergeTreeReaderWide.cpp | 11 +++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 90703947182..8f5d11b05ee 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -5057,7 +5058,7 @@ private: } else if (from_type->getCustomSerialization()) { - ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + ret = [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 9468cffd25d..b7eefab112c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -249,7 +249,7 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const auto marks_loader = std::make_shared( data_part_info_for_read, mark_cache, - data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name), + data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(stream_name), num_marks_in_part, data_part_info_for_read->getIndexGranularityInfo(), settings.save_marks_in_cache, @@ -257,24 +257,23 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const load_marks_threadpool, /*num_columns_in_mark=*/ 1); - has_any_stream = true; auto stream_settings = settings; stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; auto create_stream = [&]() { return std::make_unique( - data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, + data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION, num_marks_in_part, all_mark_ranges, stream_settings, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), std::move(marks_loader), profile_callback, clock_type); }; if (read_without_marks) - return streams.emplace(*stream_name, create_stream.operator()()); + return streams.emplace(stream_name, create_stream.operator()()).first; marks_loader->startAsyncLoad(); - return streams.emplace(*stream_name, create_stream.operator()()); + return streams.emplace(stream_name, create_stream.operator()()).first; } ReadBuffer * MergeTreeReaderWide::getStream( From fb20e80db417f63ed7a12036488accb9f418f261 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 10 May 2024 13:23:19 +0000 Subject: [PATCH 140/651] Better test, fix style --- src/Functions/FunctionsConversion.cpp | 62 ++++++++++++------- .../MergeTree/MergeTreeReaderWide.cpp | 2 +- ...9_dynamic_all_merge_algorithms_2.reference | 20 +++--- .../03039_dynamic_all_merge_algorithms_2.sh | 8 +-- 4 files changed, 56 insertions(+), 36 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 8f5d11b05ee..5bb6fa065de 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -576,7 +576,7 @@ ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) template struct ConvertImplGenericToString { - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const FormatSettings & format_settings) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const ContextPtr & context) { static_assert(std::is_same_v || std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); @@ -597,6 +597,7 @@ struct ConvertImplGenericToString auto & write_buffer = write_helper.getWriteBuffer(); + FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{}; auto serialization = type.getDefaultSerialization(); for (size_t row = 0; row < size; ++row) { @@ -1820,7 +1821,7 @@ struct ConvertImpl template struct ConvertImplGenericFromString { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) + static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count, const ContextPtr & context) { const IColumn & column_from = *arguments[0].column; const IDataType & data_type_to = *result_type; @@ -1828,7 +1829,7 @@ struct ConvertImplGenericFromString auto serialization = data_type_to.getDefaultSerialization(); const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); + executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get(), context); return res; } @@ -1838,11 +1839,12 @@ struct ConvertImplGenericFromString const ISerialization & serialization_from, size_t input_rows_count, const PaddedPODArray * null_map, - const IDataType * result_type) + const IDataType * result_type, + const ContextPtr & context) { column_to.reserve(input_rows_count); - FormatSettings format_settings; + FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{}; for (size_t i = 0; i < input_rows_count; ++i) { if (null_map && (*null_map)[i]) @@ -2299,7 +2301,7 @@ private: if constexpr (std::is_same_v) { if (from_type->getCustomSerialization()) - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context); } bool done = false; @@ -2332,7 +2334,7 @@ private: /// Generic conversion of any type to String. if (std::is_same_v) { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context); } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", @@ -3288,8 +3290,17 @@ private: if (checkAndGetDataType(from_type.get())) { if (cast_type == CastType::accurateOrNull) - return &ConvertImplGenericFromString::execute; - return &ConvertImplGenericFromString::execute; + { + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; + } + + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } return createWrapper(from_type, to_type, requested_result_is_nullable); @@ -3452,7 +3463,10 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { @@ -3495,7 +3509,10 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } DataTypePtr from_type_holder; @@ -3586,7 +3603,10 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); + }; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -3929,9 +3949,9 @@ private: } else if (checkAndGetDataType(from_type.get())) { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count, context)->assumeMutable(); res->finalize(); return res; }; @@ -4104,8 +4124,8 @@ private: args[0].type = removeNullable(removeLowCardinality(args[0].type)); if (cast_type == CastType::accurateOrNull) - return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); - return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); }; } @@ -4265,8 +4285,8 @@ private: args[0].type = removeNullable(removeLowCardinality(args[0].type)); if (cast_type == CastType::accurateOrNull) - return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); - return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count, context); }; } @@ -5020,9 +5040,9 @@ private: wrapped_result_type = makeNullable(result_type); if (this->cast_type == CastType::accurateOrNull) return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); + arguments, wrapped_result_type, column_nullable, input_rows_count, context); return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); + arguments, wrapped_result_type, column_nullable, input_rows_count, context); }; return true; } @@ -5060,7 +5080,7 @@ private: { ret = [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings()); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count, context); }; return true; } diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index b7eefab112c..b6882fdced9 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -271,7 +271,7 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const if (read_without_marks) return streams.emplace(stream_name, create_stream.operator()()).first; - + marks_loader->startAsyncLoad(); return streams.emplace(stream_name, create_stream.operator()()).first; } diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference index 03c8b4564fa..af6c7d8d567 100644 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference @@ -2,8 +2,8 @@ MergeTree compact + horizontal merge CollapsingMergeTree 100000 String 100000 UInt64 -50000 UInt64 50000 String +50000 UInt64 VersionedCollapsingMergeTree 100000 String 100000 UInt64 @@ -11,34 +11,34 @@ VersionedCollapsingMergeTree 75000 UInt64 MergeTree wide + horizontal merge CollapsingMergeTree -100000 UInt64 100000 String +100000 UInt64 50000 String 50000 UInt64 VersionedCollapsingMergeTree -100000 UInt64 100000 String +100000 UInt64 75000 String 75000 UInt64 MergeTree compact + vertical merge CollapsingMergeTree -100000 UInt64 100000 String -50000 UInt64 +100000 UInt64 50000 String +50000 UInt64 VersionedCollapsingMergeTree -100000 UInt64 100000 String -75000 UInt64 +100000 UInt64 75000 String +75000 UInt64 MergeTree wide + vertical merge CollapsingMergeTree -100000 UInt64 100000 String +100000 UInt64 50000 String 50000 UInt64 VersionedCollapsingMergeTree -100000 UInt64 100000 String -75000 UInt64 +100000 UInt64 75000 String +75000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh index 5dae9228d0a..f067a99ca19 100755 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh @@ -18,9 +18,9 @@ function test() $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "drop table test" echo "VersionedCollapsingMergeTree" @@ -29,9 +29,9 @@ function test() $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)" $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" $CH_CLIENT -q "drop table test" } From b20d60858f1286a5e406e2c74036e6ad244fda2b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 10 May 2024 15:48:32 +0200 Subject: [PATCH 141/651] Pass low cardinality settings --- src/Storages/MergeTree/IMergeTreeDataPartWriter.h | 2 -- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 8 ++++---- src/Storages/MergeTree/MergeTreeIOSettings.h | 5 +++++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index ec04fd5f8a8..52e21bed2f2 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -69,8 +69,6 @@ protected: // const MergeTreeData & storage; // TODO: remove const MergeTreeSettingsPtr storage_settings; - const size_t low_cardinality_max_dictionary_size = 0; // TODO: pass it in ctor - const bool low_cardinality_use_single_dictionary_for_part = true; // TODO: pass it in ctor const StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 1f68a9d31a1..713dee87fa8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -419,8 +419,8 @@ void MergeTreeDataPartWriterWide::writeColumn( // const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; for (const auto & granule : granules) { @@ -607,8 +607,8 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum { // const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 12a83703148..421c62887da 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -74,6 +74,8 @@ struct MergeTreeWriterSettings , blocks_are_granules_size(blocks_are_granules_size_) , query_write_settings(query_write_settings_) , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) + , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) + , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part) { } @@ -93,6 +95,9 @@ struct MergeTreeWriterSettings WriteSettings query_write_settings; size_t max_threads_for_annoy_index_creation; + + size_t low_cardinality_max_dictionary_size; + bool low_cardinality_use_single_dictionary_for_part; }; } From cd3604f23543cbd07f650c1446d54606d06a81cf Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 10 May 2024 14:14:17 +0000 Subject: [PATCH 142/651] Remove trailing whitespaces --- src/Functions/FunctionsConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 5bb6fa065de..09d0025860a 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -3296,7 +3296,7 @@ private: return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); }; } - + return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr { return ConvertImplGenericFromString::execute(arguments, result_type, column_nullable, input_rows_count, context); From 5004c225831c1fa1cf8c213673148a1ca299d4e1 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Fri, 10 May 2024 15:25:21 +0200 Subject: [PATCH 143/651] Fix Array and Map support with Keyed hashing When working with materialized key columns and rows containing Arrays or Maps (implemented as Tuple's Arrays) with multiple values, the keyed hash functions were erroneously refusing to proceed, because they misinterpreted the output vector size. Close #61497 which was reported as a security issue, but it didn't actually have any security impact. The usefulness of keyed hashing over Maps is also questionable, but we support it for completeness. --- src/Functions/FunctionsHashing.h | 24 ++++++++++++++++++- .../0_stateless/02534_keyed_siphash.reference | 3 +++ .../0_stateless/02534_keyed_siphash.sql | 7 ++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 79b33e2f75b..bccdba5ee69 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -49,6 +49,8 @@ #include #include +#include + namespace DB { @@ -75,17 +77,29 @@ namespace impl ColumnPtr key0; ColumnPtr key1; bool is_const; + const ColumnArray::Offsets * offsets{}; size_t size() const { assert(key0 && key1); assert(key0->size() == key1->size()); + assert(offsets == nullptr || offsets->size() == key0->size()); + if (offsets != nullptr) + return offsets->back(); return key0->size(); } SipHashKey getKey(size_t i) const { if (is_const) i = 0; + if (offsets != nullptr) + { + const auto begin = offsets->begin(); + auto upper = std::upper_bound(begin, offsets->end(), i); + if (upper == offsets->end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i); + i = upper - begin; + } const auto & key0data = assert_cast(*key0).getData(); const auto & key1data = assert_cast(*key1).getData(); return {key0data[i], key1data[i]}; @@ -1112,7 +1126,15 @@ private: typename ColumnVector::Container vec_temp(nested_size); bool nested_is_first = true; - executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); + + if constexpr (Keyed) + { + KeyColumnsType key_cols_tmp{key_cols}; + key_cols_tmp.offsets = &offsets; + executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first); + } + else + executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index e3fae07333a..3f478218ff1 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -236,3 +236,6 @@ Check asan bug 0 Check bug found fuzzing 9042C6691B1A75F0EA3314B6F55728BB +Check bug 2 found fuzzing +608E1FF030C9E206185B112C2A25F1A7 +ABB65AE97711A2E053E324ED88B1D08B diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 112ae15bf46..fb707109c83 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -338,3 +338,10 @@ SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY s SELECT 'Check bug found fuzzing'; SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED } SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1]))); + +SELECT 'Check bug 2 found fuzzing'; +DROP TABLE IF EXISTS sipHashKeyed_keys; +CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory; +INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'}); +SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; +DROP TABLE sipHashKeyed_keys; From a3aff6939c0b3afeeb9e4ab9c6f2992a2c61b543 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 10 May 2024 19:21:16 +0200 Subject: [PATCH 144/651] Protected methods --- src/Storages/MergeTree/IMergeTreeDataPartWriter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 52e21bed2f2..6854668a01e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -49,13 +49,13 @@ public: Columns releaseIndexColumns(); const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } +protected: SerializationPtr getSerialization(const String & column_name) const; ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const; IDataPartStorage & getDataPartStorage() { return *data_part_storage; } -protected: // const MergeTreeMutableDataPartPtr data_part; // TODO: remove From 555e9ede91a7872e8f30173a05560bf9e83ec056 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 10 May 2024 20:19:32 +0200 Subject: [PATCH 145/651] Add anova documentation --- .../reference/analysis_of_variance.md | 44 +++++++++++++++++++ .../aggregate-functions/reference/index.md | 1 + 2 files changed, 45 insertions(+) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md new file mode 100644 index 00000000000..7f0df74010b --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -0,0 +1,44 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance +sidebar_position: 6 +--- + +# analysisOfVariance + +Provides a statistical test for one-way analysis of variance (ANOVA test). It is a test over several groups of normally distributed observations to find out whether all groups have the same mean or not. + +**Syntax** + +```sql +analysisOfVariance(val, group_no) +``` + +Aliases: `anova` + +**Parameters** +- `val`: value. +- `group_no` : group number that `val` belongs to. + +:::note +Groups are enumerated starting from 0 and there should be at least two groups to perform a test. +Moreover there should be at least one group with the number of observations greater than one. +::: + +**Returned value** + +- `(f_statistic, p_value)`. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)) + +**Implementation details** + + +**Example** + +Query: + +```sql +``` + +Result: + +```response +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e9a7fe4fc2b..451ee2aae9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -37,6 +37,7 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [analysisOfVariance](/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md) - [any](/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md) - [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md) - [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md) From 06f0780a3f4683ed591573086ac4d08eb78fd13d Mon Sep 17 00:00:00 2001 From: woodlzm Date: Fri, 10 May 2024 23:27:42 -0700 Subject: [PATCH 146/651] Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. --- src/Interpreters/TraceLog.cpp | 13 +++++++++++++ src/Interpreters/TraceLog.h | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 01bedf34f15..2c56eb79089 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -53,6 +54,18 @@ ColumnsDescription TraceLogElement::getColumnsDescription() }; } +NamesAndAliases TraceLogElement::getNamesAndAliases() +{ + String build_id_hex; +#if defined(__ELF__) && !defined(OS_FREEBSD) + build_id_hex = SymbolIndex::instance().getBuildIDHex(); +#endif + return + { + {"build_id", std::make_shared(), "\'" + build_id_hex + "\'"}, + }; +} + void TraceLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 418b8d546a0..c4314cfd7b0 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -39,7 +39,7 @@ struct TraceLogElement static std::string name() { return "TraceLog"; } static ColumnsDescription getColumnsDescription(); - static NamesAndAliases getNamesAndAliases() { return {}; } + static NamesAndAliases getNamesAndAliases(); void appendToBlock(MutableColumns & columns) const; }; From 77a8a0ce98d5ea66fb8b76af7f89a245866ad315 Mon Sep 17 00:00:00 2001 From: woodlzm Date: Fri, 10 May 2024 23:29:07 -0700 Subject: [PATCH 147/651] Add stateless test for build_id column addition in trace_log. --- .../0_stateless/03150_trace_log_add_build_id.reference | 2 ++ tests/queries/0_stateless/03150_trace_log_add_build_id.sql | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03150_trace_log_add_build_id.reference create mode 100644 tests/queries/0_stateless/03150_trace_log_add_build_id.sql diff --git a/tests/queries/0_stateless/03150_trace_log_add_build_id.reference b/tests/queries/0_stateless/03150_trace_log_add_build_id.reference new file mode 100644 index 00000000000..0d66ea1aee9 --- /dev/null +++ b/tests/queries/0_stateless/03150_trace_log_add_build_id.reference @@ -0,0 +1,2 @@ +0 +1 diff --git a/tests/queries/0_stateless/03150_trace_log_add_build_id.sql b/tests/queries/0_stateless/03150_trace_log_add_build_id.sql new file mode 100644 index 00000000000..1f7bf1c02de --- /dev/null +++ b/tests/queries/0_stateless/03150_trace_log_add_build_id.sql @@ -0,0 +1,6 @@ +SELECT sleep(1); + +SYSTEM FLUSH LOGS; + +SELECT COUNT(*) > 1 FROM system.trace_log WHERE build_id IS NOT NULL; + From 7d809cbe9bb25cf292faf5a314614d45f0c4b6de Mon Sep 17 00:00:00 2001 From: woodlzm Date: Fri, 10 May 2024 23:30:48 -0700 Subject: [PATCH 148/651] Add integration test for checking new build_id column in trace_log plus the table renaming behavior upon binary changes. --- .../test_trace_log_build_id/__init__.py | 0 .../test_trace_log_build_id/test.py | 69 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 tests/integration/test_trace_log_build_id/__init__.py create mode 100644 tests/integration/test_trace_log_build_id/test.py diff --git a/tests/integration/test_trace_log_build_id/__init__.py b/tests/integration/test_trace_log_build_id/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_trace_log_build_id/test.py b/tests/integration/test_trace_log_build_id/test.py new file mode 100644 index 00000000000..b4a49b2e4d3 --- /dev/null +++ b/tests/integration/test_trace_log_build_id/test.py @@ -0,0 +1,69 @@ +import pytest +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION + +TEST_QUERY_ID = "test_trace_log_build_id_query_{}" +OLD_TEST_QUERY_ID = TEST_QUERY_ID.format('0') +NEW_TEST_QUERY_ID = TEST_QUERY_ID.format('1') +ACTIVE_TRACE_LOG_TABLE = "trace_log" +RENAMED_TRACE_LOG_TABLE = "trace_log_0" + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + with_zookeeper=True, + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, + stay_alive=True, + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_trace_log_build_id(started_cluster): + # This test checks that build_id column of system_log.trace_log is non-empty, and gets renamed when binary version changes. + # We make queries to create entries in trace_log, then restart with new version and verify if the old + # trace_log table is renamed and a new trace_log table is created. + + + query_for_table_name = "EXISTS TABLE system.{table}" + + node.query( + "SELECT sleep(1)", + query_id=OLD_TEST_QUERY_ID, + ) + node.query("SYSTEM FLUSH LOGS") + assert node.query(query_for_table_name.format(table=ACTIVE_TRACE_LOG_TABLE)) == "1\n" + assert node.query(query_for_table_name.format(table=RENAMED_TRACE_LOG_TABLE)) == "0\n" + + node.restart_with_latest_version() + + query_for_test_query_id = """ + SELECT EXISTS + ( + SELECT * + FROM system.{table} + WHERE query_id = \'{query_id}\' + ) + """ + node.query( + "SELECT sleep(1)", + query_id=NEW_TEST_QUERY_ID, + ) + node.query("SYSTEM FLUSH LOGS") + assert node.query(query_for_test_query_id.format(table=ACTIVE_TRACE_LOG_TABLE, query_id=OLD_TEST_QUERY_ID)) == "0\n" + assert node.query(query_for_test_query_id.format(table=ACTIVE_TRACE_LOG_TABLE, query_id=NEW_TEST_QUERY_ID)) == "1\n" + assert node.query(query_for_test_query_id.format(table=RENAMED_TRACE_LOG_TABLE, query_id=OLD_TEST_QUERY_ID)) == "1\n" + From 9d0ad7ba67b6855344512398b5f924bdad4ece9e Mon Sep 17 00:00:00 2001 From: copperybean Date: Sun, 14 Jan 2024 11:25:12 +0800 Subject: [PATCH 149/651] original parquet reader Change-Id: I83a8ec8271edefcd96cb5b3bcd12f6b545d9dec0 --- .../Impl/Parquet/ParquetColumnReader.h | 29 + .../Formats/Impl/Parquet/ParquetDataBuffer.h | 179 ++++++ .../Impl/Parquet/ParquetDataValuesReader.cpp | 553 ++++++++++++++++++ .../Impl/Parquet/ParquetDataValuesReader.h | 263 +++++++++ .../Impl/Parquet/ParquetLeafColReader.cpp | 506 ++++++++++++++++ .../Impl/Parquet/ParquetLeafColReader.h | 63 ++ .../Impl/Parquet/ParquetRecordReader.cpp | 225 +++++++ .../Impl/Parquet/ParquetRecordReader.h | 48 ++ 8 files changed, 1866 insertions(+) create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h diff --git a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h new file mode 100644 index 00000000000..cfd9d3ba5bd --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace parquet +{ + +class PageReader; +class ColumnChunkMetaData; +class DataPageV1; +class DataPageV2; + +} + +namespace DB +{ + +class ParquetColumnReader +{ +public: + virtual ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) = 0; + + virtual ~ParquetColumnReader() = default; +}; + +using ParquetColReaderPtr = std::unique_ptr; +using ParquetColReaders = std::vector; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h new file mode 100644 index 00000000000..1f83c74f9ad --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -0,0 +1,179 @@ +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +template struct ToArrowDecimal; + +template <> struct ToArrowDecimal>> +{ + using ArrowDecimal = arrow::Decimal128; +}; + +template <> struct ToArrowDecimal>> +{ + using ArrowDecimal = arrow::Decimal256; +}; + + +class ParquetDataBuffer +{ +private: + +public: + ParquetDataBuffer(const uint8_t * data_, UInt64 avaible_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale) + : data(reinterpret_cast(data_)), avaible(avaible_), datetime64_scale(datetime64_scale_) {} + + template + void ALWAYS_INLINE readValue(TValue & dst) + { + checkAvaible(sizeof(TValue)); + dst = *reinterpret_cast(data); + consume(sizeof(TValue)); + } + + void ALWAYS_INLINE readBytes(void * dst, size_t bytes) + { + checkAvaible(bytes); + memcpy(dst, data, bytes); + consume(bytes); + } + + void ALWAYS_INLINE readDateTime64(DateTime64 & dst) + { + static const int max_scale_num = 9; + static const UInt64 pow10[max_scale_num + 1] + = {1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; + static const UInt64 spd = 60 * 60 * 24; + static const UInt64 scaled_day[max_scale_num + 1] + = {spd, + 10 * spd, + 100 * spd, + 1000 * spd, + 10000 * spd, + 100000 * spd, + 1000000 * spd, + 10000000 * spd, + 100000000 * spd, + 1000000000 * spd}; + + checkAvaible(sizeof(parquet::Int96)); + auto decoded = parquet::DecodeInt96Timestamp(*reinterpret_cast(data)); + + uint64_t scaled_nano = decoded.nanoseconds / pow10[datetime64_scale]; + dst = static_cast(decoded.days_since_epoch * scaled_day[datetime64_scale] + scaled_nano); + + consume(sizeof(parquet::Int96)); + } + + /** + * This method should only be used to read string whose elements size is small. + * Because memcpySmallAllowReadWriteOverflow15 instead of memcpy is used according to ColumnString::indexImpl + */ + void ALWAYS_INLINE readString(ColumnString & column, size_t cursor) + { + // refer to: PlainByteArrayDecoder::DecodeArrowDense in encoding.cc + // deserializeBinarySSE2 in SerializationString.cpp + checkAvaible(4); + auto value_len = ::arrow::util::SafeLoadAs(getArrowData()); + if (unlikely(value_len < 0 || value_len > INT32_MAX - 4)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid or corrupted value_len '{}'", value_len); + } + consume(4); + checkAvaible(value_len); + + auto chars_cursor = column.getChars().size(); + column.getChars().resize(chars_cursor + value_len + 1); + + memcpySmallAllowReadWriteOverflow15(&column.getChars()[chars_cursor], data, value_len); + column.getChars().back() = 0; + + column.getOffsets().data()[cursor] = column.getChars().size(); + consume(value_len); + } + + template + void ALWAYS_INLINE readOverBigDecimal(TDecimal * out, Int32 elem_bytes_num) + { + using TArrowDecimal = typename ToArrowDecimal::ArrowDecimal; + + checkAvaible(elem_bytes_num); + + // refer to: RawBytesToDecimalBytes in reader_internal.cc, Decimal128::FromBigEndian in decimal.cc + auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num); + if (unlikely(!status.ok())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Read parquet decimal failed: {}", status.status().ToString()); + } + status.ValueUnsafe().ToBytes(reinterpret_cast(out)); + consume(elem_bytes_num); + } + +private: + const Int8 * data; + UInt64 avaible; + const UInt8 datetime64_scale; + + void ALWAYS_INLINE checkAvaible(UInt64 num) + { + if (unlikely(avaible < num)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Consuming {} bytes while {} avaible", num, avaible); + } + } + + const uint8_t * ALWAYS_INLINE getArrowData() { return reinterpret_cast(data); } + + void ALWAYS_INLINE consume(UInt64 num) + { + data += num; + avaible -= num; + } +}; + + +class LazyNullMap +{ +public: + LazyNullMap(UInt32 size_) : size(size_), col_nullable(nullptr) {} + + void setNull(UInt32 cursor) + { + initialize(); + null_map[cursor] = 1; + } + + void setNull(UInt32 cursor, UInt32 count) + { + initialize(); + memset(null_map + cursor, 1, count); + } + + ColumnPtr getNullableCol() { return col_nullable; } + +private: + UInt32 size; + UInt8 * null_map; + ColumnPtr col_nullable; + + void initialize() + { + if (likely(col_nullable)) + { + return; + } + auto col = ColumnVector::create(size); + null_map = col->getData().data(); + col_nullable = std::move(col); + memset(null_map, 0, size); + } +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp new file mode 100644 index 00000000000..659a7a11969 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -0,0 +1,553 @@ +#include "ParquetDataValuesReader.h" + +#include +#include + +#include + +namespace DB +{ + +void RleValuesReader::nextGroup() +{ + // refer to: + // RleDecoder::NextCounts in rle_encoding.h and VectorizedRleValuesReader::readNextGroup in Spark + UInt32 indicator_value = 0; + [[maybe_unused]] auto read_res = bit_reader->GetVlqInt(&indicator_value); + assert(read_res); + + cur_group_is_packed = indicator_value & 1; + cur_group_size = indicator_value >> 1; + + if (cur_group_is_packed) + { + cur_group_size *= 8; + cur_packed_bit_values.resize(cur_group_size); + bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size); + } + else + { + cur_value = 0; + read_res = bit_reader->GetAligned((bit_width + 7) / 8, &cur_value); + assert(read_res); + } + cur_group_cursor = 0; + +} + +template +void RleValuesReader::visitValues( + UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor) +{ + // refer to: VisitNullBitmapInline in visitor_inline.h + while (num_values) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + individual_visitor(cur_packed_bit_values[i]); + } + } + else + { + repeated_visitor(cur_count, cur_value); + } + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::visitNullableValues( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + LazyNullMap & null_map, + IndividualVisitor && individual_visitor, + RepeatedVisitor && repeated_visitor) +{ + while (num_values) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + if (cur_packed_bit_values[i] == max_def_level) + { + individual_visitor(cursor); + } + else + { + null_map.setNull(cursor); + } + cursor++; + } + } + else + { + if (cur_value == max_def_level) + { + repeated_visitor(cursor, cur_count); + } + else + { + null_map.setNull(cursor, cur_count); + } + cursor += cur_count; + } + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::visitNullableBySteps( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + IndividualNullVisitor && individual_null_visitor, + SteppedValidVisitor && stepped_valid_visitor, + RepeatedVisitor && repeated_visitor) +{ + // refer to: + // RleDecoder::GetBatch in rle_encoding.h and TypedColumnReaderImpl::ReadBatchSpaced in column_reader.cc + // VectorizedRleValuesReader::readBatchInternal in Spark + while (num_values > 0) + { + nextGroupIfNecessary(); + auto cur_count = std::min(num_values, curGroupLeft()); + + if (cur_group_is_packed) + { + valid_index_steps.resize(cur_count + 1); + valid_index_steps[0] = 0; + auto step_idx = 0; + auto null_map_cursor = cursor; + + for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++) + { + if (cur_packed_bit_values[i] == max_def_level) + { + valid_index_steps[++step_idx] = 1; + } + else + { + individual_null_visitor(null_map_cursor); + if (unlikely(valid_index_steps[step_idx] == UINT8_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported packed values number"); + } + valid_index_steps[step_idx]++; + } + null_map_cursor++; + } + valid_index_steps.resize(step_idx + 1); + stepped_valid_visitor(cursor, valid_index_steps); + } + else + { + repeated_visitor(cur_value == max_def_level, cursor, cur_count); + } + + cursor += cur_count; + cur_group_cursor += cur_count; + num_values -= cur_count; + } +} + +template +void RleValuesReader::setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter) +{ + visitValues( + num_values, + /* individual_visitor */ [&](Int32 val) + { + *(res_values++) = val_getter(val); + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + std::fill(res_values, res_values + count, val_getter(val)); + res_values += count; + } + ); +} + +template +void RleValuesReader::setValueBySteps( + TValue * res_values, + const std::vector & col_data_steps, + ValueGetter && val_getter) +{ + auto step_iterator = col_data_steps.begin(); + res_values += *(step_iterator++); + + visitValues( + col_data_steps.size() - 1, + /* individual_visitor */ [&](Int32 val) + { + *res_values = val_getter(val); + res_values += *(step_iterator++); + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + auto getted_val = val_getter(val); + for (UInt32 i = 0; i < count; i++) + { + *res_values = getted_val; + res_values += *(step_iterator++); + } + } + ); +} + + +namespace +{ + +template +TValue * getResizedPrimitiveData(TColumn & column, size_t size) +{ + auto old_size = column.size(); + column.getData().resize(size); + memset(column.getData().data() + old_size, 0, sizeof(TValue) * (size - old_size)); + return column.getData().data(); +} + +} // anoynomous namespace + + +template <> +void ParquetPlainValuesReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto & column = *assert_cast(col_ptr.get()); + auto cursor = column.size(); + + column.getOffsets().resize(cursor + num_values); + auto * offset_data = column.getOffsets().data(); + auto & chars = column.getChars(); + + def_level_reader->visitValues( + num_values, + /* individual_visitor */ [&](Int32 val) + { + if (val == max_def_level) + { + plain_data_buffer.readString(column, cursor); + } + else + { + chars.push_back(0); + offset_data[cursor] = chars.size(); + null_map.setNull(cursor); + } + cursor++; + }, + /* repeated_visitor */ [&](UInt32 count, Int32 val) + { + if (val == max_def_level) + { + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readString(column, cursor); + cursor++; + } + } + else + { + null_map.setNull(cursor, count); + + auto chars_size_bak = chars.size(); + chars.resize(chars_size_bak + count); + memset(&chars[chars_size_bak], 0, count); + + auto idx = cursor; + cursor += count; + // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is avaible + for (auto val_offset = offset_data[idx - 1]; idx < cursor; idx++) + { + offset_data[idx] = ++val_offset; + } + } + } + ); +} + + +template <> +void ParquetPlainValuesReader>::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData( + *assert_cast *>(col_ptr.get()), cursor + num_values); + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readDateTime64(column_data[nest_cursor]); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + auto col_data_pos = column_data + nest_cursor; + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readDateTime64(col_data_pos[i]); + } + } + ); +} + +template +void ParquetPlainValuesReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(col_ptr.get()), cursor + num_values); + using TValue = std::decay_t; + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readValue(column_data[nest_cursor]); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + plain_data_buffer.readBytes(column_data + nest_cursor, count * sizeof(TValue)); + } + ); +} + + +template +void ParquetFixedLenPlainReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + if constexpr (std::same_as> || std::same_as>) + { + readOverBigDecimal(col_ptr, null_map, num_values); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported type"); + } +} + +template +void ParquetFixedLenPlainReader::readOverBigDecimal( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData( + *assert_cast(col_ptr.get()), cursor + num_values); + + def_level_reader->visitNullableValues( + cursor, + num_values, + max_def_level, + null_map, + /* individual_visitor */ [&](size_t nest_cursor) + { + plain_data_buffer.readOverBigDecimal(column_data + nest_cursor, elem_bytes_num); + }, + /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) + { + auto col_data_pos = column_data + nest_cursor; + for (UInt32 i = 0; i < count; i++) + { + plain_data_buffer.readOverBigDecimal(col_data_pos + i, elem_bytes_num); + } + } + ); +} + + +template +void ParquetRleLCReader::readBatch( + MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = index_col->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(index_col.get()), cursor + num_values); + + bool has_null = false; + + // in ColumnLowCardinality, first element in dictionary is null + // so we should increase each value by 1 in parquet index + auto val_getter = [&](Int32 val) { return val + 1; }; + + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](UInt32 nest_cursor) { + column_data[nest_cursor] = 0; + has_null = true; + }, + /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector & valid_index_steps) { + rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); + }, + /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) { + if (is_valid) + { + rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); + } + else + { + auto data_pos = column_data + nest_cursor; + std::fill(data_pos, data_pos + count, 0); + has_null = true; + } + } + ); + if (has_null) + { + null_map.setNull(0); + } +} + +template <> +void ParquetRleDictReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto & column = *assert_cast(col_ptr.get()); + auto cursor = column.size(); + std::vector value_cache; + + const auto & dict_chars = static_cast(page_dictionary).getChars(); + const auto & dict_offsets = static_cast(page_dictionary).getOffsets(); + + column.getOffsets().resize(cursor + num_values); + auto * offset_data = column.getOffsets().data(); + auto & chars = column.getChars(); + + auto append_nulls = [&](UInt8 num) { + for (auto limit = cursor + num; cursor < limit; cursor++) + { + chars.push_back(0); + offset_data[cursor] = chars.size(); + null_map.setNull(cursor); + } + }; + + auto append_string = [&](Int32 dict_idx) { + auto dict_chars_cursor = dict_offsets[dict_idx - 1]; + auto value_len = dict_offsets[dict_idx] - dict_chars_cursor; + auto chars_cursor = chars.size(); + chars.resize(chars_cursor + value_len); + + memcpySmallAllowReadWriteOverflow15(&chars[chars_cursor], &dict_chars[dict_chars_cursor], value_len); + offset_data[cursor] = chars.size(); + cursor++; + }; + + auto val_getter = [&](Int32 val) { return val + 1; }; + + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](UInt32) {}, + /* stepped_valid_visitor */ [&](UInt32, const std::vector & valid_index_steps) { + value_cache.resize(valid_index_steps.size()); + rle_data_reader->setValues(value_cache.data() + 1, valid_index_steps.size() - 1, val_getter); + + append_nulls(valid_index_steps[0]); + for (size_t i = 1; i < valid_index_steps.size(); i++) + { + append_string(value_cache[i]); + append_nulls(valid_index_steps[i] - 1); + } + }, + /* repeated_visitor */ [&](bool is_valid, UInt32, UInt32 count) { + if (is_valid) + { + value_cache.resize(count); + rle_data_reader->setValues(value_cache.data(), count, val_getter); + for (UInt32 i = 0; i < count; i++) + { + append_string(value_cache[i]); + } + } + else + { + append_nulls(count); + } + } + ); +} + +template +void ParquetRleDictReader::readBatch( + MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) +{ + auto cursor = col_ptr->size(); + auto * column_data = getResizedPrimitiveData(*assert_cast(col_ptr.get()), cursor + num_values); + const auto & dictionary_array = static_cast(page_dictionary).getData(); + + auto val_getter = [&](Int32 val) { return dictionary_array[val]; }; + def_level_reader->visitNullableBySteps( + cursor, + num_values, + max_def_level, + /* individual_null_visitor */ [&](UInt32 nest_cursor) { + null_map.setNull(nest_cursor); + }, + /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector & valid_index_steps) { + rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); + }, + /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) { + if (is_valid) + { + rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); + } + else + { + null_map.setNull(nest_cursor, count); + } + } + ); +} + + +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader; + +template class ParquetFixedLenPlainReader>; +template class ParquetFixedLenPlainReader>; + +template class ParquetRleLCReader; +template class ParquetRleLCReader; +template class ParquetRleLCReader; + +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader>; +template class ParquetRleDictReader; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h new file mode 100644 index 00000000000..2c95f495339 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -0,0 +1,263 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ParquetDataBuffer.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +class RleValuesReader +{ +public: + RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_) + : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) {} + + /** + * @brief Used when the bit_width is 0, so all elements have same value. + */ + RleValuesReader(UInt32 total_size, Int32 val = 0) + : bit_reader(nullptr), bit_width(0), cur_group_size(total_size), cur_value(val), cur_group_is_packed(false) + {} + + void nextGroup(); + + void nextGroupIfNecessary() { if (cur_group_cursor >= cur_group_size) nextGroup(); } + + UInt32 curGroupLeft() const { return cur_group_size - cur_group_cursor; } + + /** + * @brief Visit num_values elements. + * For RLE encoding, for same group, the value is same, so they can be visited repeatedly. + * For BitPacked encoding, the values may be different with each other, so they must be visited individual. + * + * @tparam IndividualVisitor A callback with signature: void(Int32 val) + * @tparam RepeatedVisitor A callback with signature: void(UInt32 count, Int32 val) + */ + template + void visitValues(UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor); + + /** + * @brief Visit num_values elements by parsed nullability. + * If the parsed value is same as max_def_level, then it is processed as null value. + * + * @tparam IndividualVisitor A callback with signature: void(size_t cursor) + * @tparam RepeatedVisitor A callback with signature: void(size_t cursor, UInt32 count) + * + * Because the null map is processed, so only the callbacks only need to process the valid data. + */ + template + void visitNullableValues( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + LazyNullMap & null_map, + IndividualVisitor && individual_visitor, + RepeatedVisitor && repeated_visitor); + + /** + * @brief Visit num_values elements by parsed nullability. + * It may be inefficient to process the valid data individually like in visitNullableValues, + * so a valid_index_steps index array is generated first, in order to process valid data continuously. + * + * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value + * @tparam SteppedValidVisitor A callback with signature: + * void(size_t cursor, const std::vector & valid_index_steps) + * for n valid elements with null value interleaved in a BitPacked group, + * i-th item in valid_index_steps describes how many elements in column there are after (i-1)-th valid element. + * + * take following BitPacked group with 2 valid elements for example: + * null valid null null valid null + * then the valid_index_steps has values [1, 3, 2]. + * Please note that the the sum of valid_index_steps is same as elements number in this group. + * + * @tparam RepeatedVisitor A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count) + */ + template + void visitNullableBySteps( + size_t cursor, + UInt32 num_values, + Int32 max_def_level, + IndividualNullVisitor && null_visitor, + SteppedValidVisitor && stepped_valid_visitor, + RepeatedVisitor && repeated_visitor); + + /** + * @brief Set the Values to column_data directly + * + * @tparam TValue The type of column data. + * @tparam ValueGetter A callback with signature: TValue(Int32 val) + */ + template + void setValues(TValue * column_data, UInt32 num_values, ValueGetter && val_getter); + + /** + * @brief Set the value by valid_index_steps generated in visitNullableBySteps. + * According to visitNullableBySteps, the elements number is valid_index_steps.size()-1, + * so valid_index_steps.size()-1 elements are read, and set to column_data with steps in valid_index_steps + */ + template + void setValueBySteps( + TValue * column_data, + const std::vector & col_data_steps, + ValueGetter && val_getter); + +private: + std::unique_ptr bit_reader; + + std::vector cur_packed_bit_values; + std::vector valid_index_steps; + + Int32 bit_width; + + UInt32 cur_group_size = 0; + UInt32 cur_group_cursor = 0; + Int32 cur_value; + bool cur_group_is_packed; +}; + +using RleValuesReaderPtr = std::unique_ptr; + + +class ParquetDataValuesReader +{ +public: + virtual void readBatch(MutableColumnPtr & column, LazyNullMap & null_map, UInt32 num_values) = 0; + + virtual ~ParquetDataValuesReader() = default; +}; + +using ParquetDataValuesReaderPtr = std::unique_ptr; + + +/** + * The definition level is RLE or BitPacked encoding, while data is read directly + */ +template +class ParquetPlainValuesReader : public ParquetDataValuesReader +{ +public: + + ParquetPlainValuesReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + ParquetDataBuffer data_buffer_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , plain_data_buffer(std::move(data_buffer_)) + {} + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + ParquetDataBuffer plain_data_buffer; +}; + +/** + * The data and definition level encoding are same as ParquetPlainValuesReader. + * But the element size is const and bigger than primitive data type. + */ +template +class ParquetFixedLenPlainReader : public ParquetDataValuesReader +{ +public: + + ParquetFixedLenPlainReader( + Int32 max_def_level_, + Int32 elem_bytes_num_, + std::unique_ptr def_level_reader_, + ParquetDataBuffer data_buffer_) + : max_def_level(max_def_level_) + , elem_bytes_num(elem_bytes_num_) + , def_level_reader(std::move(def_level_reader_)) + , plain_data_buffer(std::move(data_buffer_)) + {} + + void readOverBigDecimal(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values); + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + Int32 elem_bytes_num; + std::unique_ptr def_level_reader; + ParquetDataBuffer plain_data_buffer; +}; + +/** + * Read data according to the format of ColumnLowCardinality format. + * + * Only index and null column are processed in this class. + * And all null value is mapped to first index in dictionary, + * so the result index valued is added by one. +*/ +template +class ParquetRleLCReader : public ParquetDataValuesReader +{ +public: + ParquetRleLCReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + std::unique_ptr rle_data_reader_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , rle_data_reader(std::move(rle_data_reader_)) + {} + + void readBatch(MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + std::unique_ptr rle_data_reader; +}; + +/** + * The definition level is RLE or BitPacked encoded, + * and the index of dictionary is also RLE or BitPacked encoded. + * + * while the result is not parsed as a low cardinality column, + * instead, a normal column is generated. + */ +template +class ParquetRleDictReader : public ParquetDataValuesReader +{ +public: + ParquetRleDictReader( + Int32 max_def_level_, + std::unique_ptr def_level_reader_, + std::unique_ptr rle_data_reader_, + const IColumn & page_dictionary_) + : max_def_level(max_def_level_) + , def_level_reader(std::move(def_level_reader_)) + , rle_data_reader(std::move(rle_data_reader_)) + , page_dictionary(page_dictionary_) + {} + + void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override; + +private: + Int32 max_def_level; + std::unique_ptr def_level_reader; + std::unique_ptr rle_data_reader; + const IColumn & page_dictionary; +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp new file mode 100644 index 00000000000..00dee9074fe --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -0,0 +1,506 @@ +#include "ParquetLeafColReader.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +namespace +{ + +template +void visitColStrIndexType(size_t data_size, TypeVisitor && visitor) +{ + // refer to: DataTypeLowCardinality::createColumnUniqueImpl + if (data_size < (1ull << 8)) + { + visitor(static_cast(nullptr)); + } + else if (data_size < (1ull << 16)) + { + visitor(static_cast(nullptr)); + } + else if (data_size < (1ull << 32)) + { + visitor(static_cast(nullptr)); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported data size {}", data_size); + } +} + +void reserveColumnStrRows(MutableColumnPtr & col, UInt32 rows_num) +{ + col->reserve(rows_num); + + /// Never reserve for too big size according to SerializationString::deserializeBinaryBulk + if (rows_num < 256 * 1024 * 1024) + { + try + { + static_cast(col.get())->getChars().reserve(rows_num); + } + catch (Exception & e) + { + e.addMessage("(limit = " + toString(rows_num) + ")"); + throw; + } + } +}; + + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */); + +template <> +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & /* data_type */) +{ + auto col = ColumnString::create(); + col->getOffsets().resize(page.num_values() + 1); + col->getChars().reserve(page.num_values()); + ParquetDataBuffer buffer(page.data(), page.size()); + + // will be read as low cardinality column + // in which case, the null key is set to first position, so the first string should be empty + col->getChars().push_back(0); + col->getOffsets()[0] = 1; + for (auto i = 1; i <= page.num_values(); i++) + { + buffer.readString(*col, i); + } + return col; +} + +template <> +ColumnPtr readDictPage>( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & data_type) +{ + auto & datetime_type = assert_cast(*data_type); + auto dict_col = ColumnDecimal::create(page.num_values(), datetime_type.getScale()); + auto * col_data = dict_col->getData().data(); + ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale()); + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readDateTime64(col_data[i]); + } + return dict_col; +} + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale()); + auto * col_data = dict_col->getData().data(); + ParquetDataBuffer buffer(page.data(), page.size()); + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readOverBigDecimal(col_data + i, col_des.type_length()); + } + return dict_col; +} + +template requires (!std::is_same_v) +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & col_des, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale()); + ParquetDataBuffer buffer(page.data(), page.size()); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnDecimal::ValueType)); + return dict_col; +} + +template +ColumnPtr readDictPage( + const parquet::DictionaryPage & page, + const parquet::ColumnDescriptor & /* col_des */, + const DataTypePtr & /* data_type */) +{ + auto dict_col = TColumnVector::create(page.num_values()); + ParquetDataBuffer buffer(page.data(), page.size()); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnVector::ValueType)); + return dict_col; +} + + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer); + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer) +{ + return std::make_unique>( + col_des.max_definition_level(), + col_des.type_length(), + std::move(def_level_reader), + std::move(buffer)); +} + +template +std::unique_ptr createPlainReader( + const parquet::ColumnDescriptor & col_des, + RleValuesReaderPtr def_level_reader, + ParquetDataBuffer buffer) +{ + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); +} + + +} // anonymous namespace + + +template +ParquetLeafColReader::ParquetLeafColReader( + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr base_type_, + std::unique_ptr meta_, + std::unique_ptr reader_) + : col_descriptor(col_descriptor_) + , base_data_type(base_type_) + , col_chunk_meta(std::move(meta_)) + , parquet_page_reader(std::move(reader_)) + , log(&Poco::Logger::get("ParquetLeafColReader")) +{ +} + +template +ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt32 rows_num, const String & name) +{ + reading_rows_num = rows_num; + auto readPageIfEmpty = [&]() { + while (!cur_page_values) readPage(); + }; + + // make sure the dict page has been read, and the status is updated + readPageIfEmpty(); + resetColumn(rows_num); + + while (rows_num) + { + // if dictionary page encountered, another page should be read + readPageIfEmpty(); + + auto read_values = std::min(rows_num, cur_page_values); + data_values_reader->readBatch(column, *null_map, read_values); + + cur_page_values -= read_values; + rows_num -= read_values; + } + + return releaseColumn(name); +} + +template <> +void ParquetLeafColReader::resetColumn(UInt32 rows_num) +{ + if (reading_low_cardinality) + { + assert(dictionary); + visitColStrIndexType(dictionary->size(), [&](TColVec *) { + column = TColVec::create(); + }); + + // only first position is used + null_map = std::make_unique(1); + column->reserve(rows_num); + } + else + { + null_map = std::make_unique(rows_num); + column = ColumnString::create(); + reserveColumnStrRows(column, rows_num); + } +} + +template +void ParquetLeafColReader::resetColumn(UInt32 rows_num) +{ + assert(!reading_low_cardinality); + + column = base_data_type->createColumn(); + column->reserve(rows_num); + null_map = std::make_unique(rows_num); +} + +template +void ParquetLeafColReader::degradeDictionary() +{ + assert(dictionary && column->size()); + null_map = std::make_unique(reading_rows_num); + auto col_existing = std::move(column); + column = ColumnString::create(); + + ColumnString & col_dest = *static_cast(column.get()); + const ColumnString & col_dict_str = *static_cast(dictionary.get()); + + visitColStrIndexType(dictionary->size(), [&](TColVec *) { + const TColVec & col_src = *static_cast(col_existing.get()); + reserveColumnStrRows(column, reading_rows_num); + + col_dest.getOffsets().resize(col_src.size()); + for (size_t i = 0; i < col_src.size(); i++) + { + auto src_idx = col_src.getData()[i]; + if (0 == src_idx) + { + null_map->setNull(i); + } + auto dict_chars_cursor = col_dict_str.getOffsets()[src_idx - 1]; + auto str_len = col_dict_str.getOffsets()[src_idx] - dict_chars_cursor; + auto dst_chars_cursor = col_dest.getChars().size(); + col_dest.getChars().resize(dst_chars_cursor + str_len); + + memcpySmallAllowReadWriteOverflow15( + &col_dest.getChars()[dst_chars_cursor], &col_dict_str.getChars()[dict_chars_cursor], str_len); + col_dest.getOffsets()[i] = col_dest.getChars().size(); + } + }); + LOG_INFO(log, "degraded dictionary to normal column"); +} + +template +ColumnWithTypeAndName ParquetLeafColReader::releaseColumn(const String & name) +{ + DataTypePtr data_type = base_data_type; + if (reading_low_cardinality) + { + MutableColumnPtr col_unique; + if (null_map->getNullableCol()) + { + data_type = std::make_shared(data_type); + col_unique = ColumnUnique::create(dictionary->assumeMutable(), true); + } + else + { + col_unique = ColumnUnique::create(dictionary->assumeMutable(), false); + } + column = ColumnLowCardinality::create(std::move(col_unique), std::move(column), true); + data_type = std::make_shared(data_type); + } + else + { + if (null_map->getNullableCol()) + { + column = ColumnNullable::create(std::move(column), null_map->getNullableCol()->assumeMutable()); + data_type = std::make_shared(data_type); + } + } + ColumnWithTypeAndName res = {std::move(column), data_type, name}; + column = nullptr; + null_map = nullptr; + + return res; +} + +template +void ParquetLeafColReader::readPage() +{ + // refer to: ColumnReaderImplBase::ReadNewPage in column_reader.cc + auto cur_page = parquet_page_reader->NextPage(); + switch (cur_page->type()) + { + case parquet::PageType::DATA_PAGE: + readPageV1(*std::static_pointer_cast(cur_page)); + break; + case parquet::PageType::DATA_PAGE_V2: + readPageV2(*std::static_pointer_cast(cur_page)); + break; + case parquet::PageType::DICTIONARY_PAGE: + { + const parquet::DictionaryPage & dict_page = *std::static_pointer_cast(cur_page); + if (unlikely( + dict_page.encoding() != parquet::Encoding::PLAIN_DICTIONARY + && dict_page.encoding() != parquet::Encoding::PLAIN)) + { + throw new Exception( + ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding()); + } + LOG_INFO(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); + + dictionary = readDictPage(dict_page, col_descriptor, base_data_type); + if (std::is_same_v) + { + reading_low_cardinality = true; + } + break; + } + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported page type: {}", cur_page->type()); + } +} + +template +void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) +{ + static parquet::LevelDecoder repetition_level_decoder; + + cur_page_values = page.num_values(); + + // refer to: VectorizedColumnReader::readPageV1 in Spark and LevelDecoder::SetData in column_reader.cc + if (page.definition_level_encoding() != parquet::Encoding::RLE && col_descriptor.max_definition_level() != 0) + { + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.definition_level_encoding()); + } + const auto * buffer = page.data(); + auto max_size = page.size(); + + if (col_descriptor.max_repetition_level() > 0) + { + auto rep_levels_bytes = repetition_level_decoder.SetData( + page.repetition_level_encoding(), col_descriptor.max_repetition_level(), 0, buffer, max_size); + buffer += rep_levels_bytes; + max_size -= rep_levels_bytes; + } + + assert(col_descriptor.max_definition_level() >= 0); + std::unique_ptr def_level_reader; + if (col_descriptor.max_definition_level() > 0) { + auto bit_width = arrow::BitUtil::Log2(col_descriptor.max_definition_level() + 1); + auto num_bytes = ::arrow::util::SafeLoadAs(buffer); + auto bit_reader = std::make_unique(buffer + 4, num_bytes); + num_bytes += 4; + buffer += num_bytes; + max_size -= num_bytes; + def_level_reader = std::make_unique(std::move(bit_reader), bit_width); + } + else + { + def_level_reader = std::make_unique(page.num_values()); + } + + switch (page.encoding()) + { + case parquet::Encoding::PLAIN: + { + if (reading_low_cardinality) + { + reading_low_cardinality = false; + degradeDictionary(); + } + + ParquetDataBuffer parquet_buffer = [&]() { + if constexpr (!std::is_same_v, TColumn>) + return ParquetDataBuffer(buffer, max_size); + + auto scale = assert_cast(*base_data_type).getScale(); + return ParquetDataBuffer(buffer, max_size, scale); + }(); + data_values_reader = createPlainReader( + col_descriptor, std::move(def_level_reader), std::move(parquet_buffer)); + break; + } + case parquet::Encoding::RLE_DICTIONARY: + case parquet::Encoding::PLAIN_DICTIONARY: + { + if (unlikely(!dictionary)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "dictionary should be existed"); + } + + // refer to: DictDecoderImpl::SetData in encoding.cc + auto bit_width = *buffer; + auto bit_reader = std::make_unique(++buffer, --max_size); + data_values_reader = createDictReader( + std::move(def_level_reader), std::make_unique(std::move(bit_reader), bit_width)); + break; + } + case parquet::Encoding::BYTE_STREAM_SPLIT: + case parquet::Encoding::DELTA_BINARY_PACKED: + case parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY: + case parquet::Encoding::DELTA_BYTE_ARRAY: + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.encoding()); + + default: + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unknown encoding type: {}", page.encoding()); + } +} + +template +void ParquetLeafColReader::readPageV2(const parquet::DataPageV2 & /*page*/) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "read page V2 is not implemented yet"); +} + +template +std::unique_ptr ParquetLeafColReader::createDictReader( + std::unique_ptr def_level_reader, std::unique_ptr rle_data_reader) +{ + if (reading_low_cardinality && std::same_as) + { + std::unique_ptr res; + visitColStrIndexType(dictionary->size(), [&](TCol *) { + res = std::make_unique>( + col_descriptor.max_definition_level(), + std::move(def_level_reader), + std::move(rle_data_reader)); + }); + return res; + } + return std::make_unique>( + col_descriptor.max_definition_level(), + std::move(def_level_reader), + std::move(rle_data_reader), + *assert_cast(dictionary.get())); +} + + +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; +template class ParquetLeafColReader>; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h new file mode 100644 index 00000000000..f730afe40ed --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include + +#include "ParquetColumnReader.h" +#include "ParquetDataValuesReader.h" + +namespace parquet +{ + +class ColumnDescriptor; + +} + + +namespace DB +{ + +template +class ParquetLeafColReader : public ParquetColumnReader +{ +public: + ParquetLeafColReader( + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr base_type_, + std::unique_ptr meta_, + std::unique_ptr reader_); + + ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) override; + +private: + const parquet::ColumnDescriptor & col_descriptor; + DataTypePtr base_data_type; + std::unique_ptr col_chunk_meta; + std::unique_ptr parquet_page_reader; + std::unique_ptr data_values_reader; + + MutableColumnPtr column; + std::unique_ptr null_map; + + ColumnPtr dictionary; + + UInt32 cur_page_values = 0; + UInt32 reading_rows_num = 0; + bool reading_low_cardinality = false; + + Poco::Logger * log; + + void resetColumn(UInt32 rows_num); + void degradeDictionary(); + ColumnWithTypeAndName releaseColumn(const String & name); + + void readPage(); + void readPageV1(const parquet::DataPageV1 & page); + void readPageV2(const parquet::DataPageV2 & page); + + std::unique_ptr createDictReader( + std::unique_ptr def_level_reader, std::unique_ptr rle_data_reader); +}; + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp new file mode 100644 index 00000000000..a5744b85174 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -0,0 +1,225 @@ +#include "ParquetRecordReader.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ParquetLeafColReader.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + +// #define THROW_ARROW_NOT_OK(status) \ +// do \ +// { \ +// if (::arrow::Status _s = (status); !_s.ok()) \ +// throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ +// } while (false) + + +#define THROW_PARQUET_EXCEPTION(s) \ + do \ + { \ + try { (s); } \ + catch (const ::parquet::ParquetException & e) \ + { \ + throw Exception(e.what(), ErrorCodes::PARQUET_EXCEPTION); \ + } \ + } while (false) + +namespace +{ + +Int64 getTotalRows(const parquet::FileMetaData & meta_data) +{ + Int64 res = 0; + for (int i = 0; i < meta_data.num_row_groups(); i++) + { + res += meta_data.RowGroup(i)->num_rows(); + } + return res; +} + +std::unique_ptr createReader( + const parquet::ColumnDescriptor & col_descriptor, + DataTypePtr ch_type, + std::unique_ptr meta, + std::unique_ptr reader) +{ + if (col_descriptor.logical_type()->is_date() && parquet::Type::INT32 == col_descriptor.physical_type()) + { + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + } + else if (col_descriptor.logical_type()->is_decimal()) + { + switch (col_descriptor.physical_type()) + { + case parquet::Type::INT32: + { + auto data_type = std::make_shared( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, data_type, std::move(meta), std::move(reader)); + } + case parquet::Type::INT64: + { + auto data_type = std::make_shared( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, data_type, std::move(meta), std::move(reader)); + } + case parquet::Type::FIXED_LEN_BYTE_ARRAY: + { + if (col_descriptor.type_length() <= static_cast(DecimalUtils::max_precision)) + { + auto data_type = std::make_shared( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, data_type, std::move(meta), std::move(reader)); + } + else + { + auto data_type = std::make_shared( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, data_type, std::move(meta), std::move(reader)); + } + } + default: + throw Exception( + ErrorCodes::PARQUET_EXCEPTION, + "Type not supported for decimal: {}", + col_descriptor.physical_type()); + } + } + else + { + switch (col_descriptor.physical_type()) + { + case parquet::Type::INT32: + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + case parquet::Type::INT64: + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + case parquet::Type::FLOAT: + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + case parquet::Type::INT96: + { + DataTypePtr read_type = ch_type; + if (!isDateTime64(ch_type)) + { + read_type = std::make_shared(ParquetRecordReader::default_datetime64_scale); + } + return std::make_unique>>( + col_descriptor, read_type, std::move(meta), std::move(reader)); + } + case parquet::Type::DOUBLE: + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + case parquet::Type::BYTE_ARRAY: + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + default: + throw Exception( + ErrorCodes::PARQUET_EXCEPTION, "Type not supported: {}", col_descriptor.physical_type()); + } + } +} + +} // anonymouse namespace + +ParquetRecordReader::ParquetRecordReader( + Block header_, + std::shared_ptr<::arrow::io::RandomAccessFile> file, + const parquet::ReaderProperties& properties) + : header(std::move(header_)) +{ + // Only little endian system is supported currently + static_assert(std::endian::native == std::endian::little); + + log = &Poco::Logger::get("ParquetRecordReader"); + THROW_PARQUET_EXCEPTION(file_reader = parquet::ParquetFileReader::Open(std::move(file), properties)); + left_rows = getTotalRows(*file_reader->metadata()); + + parquet_col_indice.reserve(header.columns()); + column_readers.reserve(header.columns()); + for (const auto & col_with_name : header) + { + auto idx = file_reader->metadata()->schema()->ColumnIndex(col_with_name.name); + if (idx < 0) + { + throw Exception("can not find column with name: " + col_with_name.name, ErrorCodes::BAD_ARGUMENTS); + } + parquet_col_indice.push_back(idx); + } +} + +Chunk ParquetRecordReader::readChunk(UInt32 num_rows) +{ + if (!left_rows) + { + return Chunk{}; + } + if (!cur_row_group_left_rows) + { + loadNextRowGroup(); + } + + Columns columns(header.columns()); + auto num_rows_read = std::min(static_cast(num_rows), cur_row_group_left_rows); + for (size_t i = 0; i < header.columns(); i++) + { + columns[i] = castColumn( + column_readers[i]->readBatch(num_rows_read, header.getByPosition(i).name), + header.getByPosition(i).type); + } + left_rows -= num_rows_read; + cur_row_group_left_rows -= num_rows_read; + + return Chunk{std::move(columns), num_rows_read}; +} + +void ParquetRecordReader::loadNextRowGroup() +{ + Stopwatch watch(CLOCK_MONOTONIC); + cur_row_group_reader = file_reader->RowGroup(next_row_group_idx); + + column_readers.clear(); + for (size_t i = 0; i < parquet_col_indice.size(); i++) + { + column_readers.emplace_back(createReader( + *file_reader->metadata()->schema()->Column(parquet_col_indice[i]), + header.getByPosition(i).type, + cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]), + cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i]))); + } + LOG_DEBUG(log, "reading row group {} consumed {} ms", next_row_group_idx, watch.elapsedNanoseconds() / 1e6); + ++next_row_group_idx; + cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows(); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h new file mode 100644 index 00000000000..d77cab6553b --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "ParquetColumnReader.h" + +namespace DB +{ + +class ParquetRecordReader +{ +public: + ParquetRecordReader( + Block header_, + std::shared_ptr<::arrow::io::RandomAccessFile> file, + const parquet::ReaderProperties& properties); + + Chunk readChunk(UInt32 num_rows); + + // follow the scale generated by spark + static constexpr UInt8 default_datetime64_scale = 9; + +private: + std::unique_ptr file_reader; + + Block header; + + std::shared_ptr cur_row_group_reader; + ParquetColReaders column_readers; + + std::vector parquet_col_indice; + UInt64 left_rows; + UInt64 cur_row_group_left_rows = 0; + int next_row_group_idx = 0; + + Poco::Logger * log; + + void loadNextRowGroup(); +}; + +} From 8fb89cec9f28d6a12c2216ccd849fe0ead3ccd33 Mon Sep 17 00:00:00 2001 From: copperybean Date: Sun, 14 Jan 2024 12:01:23 +0800 Subject: [PATCH 150/651] fix build Change-Id: I57f025b17a04e2c5dded3f18e7f477841287a2c2 --- base/base/Decimal_fwd.h | 4 ++++ src/Columns/ColumnDecimal.h | 8 +++++++ src/Columns/ColumnVector.h | 3 +++ src/Common/ErrorCodes.cpp | 1 + .../Impl/Parquet/ParquetColumnReader.h | 3 ++- .../Formats/Impl/Parquet/ParquetDataBuffer.h | 12 ++++++---- .../Impl/Parquet/ParquetDataValuesReader.cpp | 23 ++++++++++--------- .../Impl/Parquet/ParquetDataValuesReader.h | 23 +++++++++---------- .../Impl/Parquet/ParquetLeafColReader.cpp | 17 +++++++------- .../Impl/Parquet/ParquetLeafColReader.h | 7 +++--- .../Impl/Parquet/ParquetRecordReader.cpp | 19 ++++++--------- .../Impl/Parquet/ParquetRecordReader.h | 7 +++--- 12 files changed, 71 insertions(+), 56 deletions(-) diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h index beb228cea3c..a11e13a479b 100644 --- a/base/base/Decimal_fwd.h +++ b/base/base/Decimal_fwd.h @@ -44,6 +44,10 @@ concept is_over_big_int = || std::is_same_v || std::is_same_v || std::is_same_v; + +template +concept is_over_big_decimal = is_decimal && is_over_big_int; + } template <> struct is_signed { static constexpr bool value = true; }; diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index e0ea26744dc..e606aaaff0f 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -141,6 +141,14 @@ protected: UInt32 scale; }; +template +concept is_col_over_big_decimal = std::is_same_v> + && is_decimal && is_over_big_int; + +template +concept is_col_int_decimal = std::is_same_v> + && is_decimal && std::is_integral_v; + template class ColumnVector; template struct ColumnVectorOrDecimalT { using Col = ColumnVector; }; template struct ColumnVectorOrDecimalT { using Col = ColumnDecimal; }; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 39ee1d931bd..91bceaa4534 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -441,6 +441,9 @@ ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_ return res; } +template +concept is_col_vector = std::is_same_v>; + /// Prevent implicit template instantiation of ColumnVector for common types extern template class ColumnVector; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 44c051401ef..106f443d532 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -600,6 +600,7 @@ M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \ M(720, USER_EXPIRED) \ M(721, DEPRECATED_FUNCTION) \ + M(722, PARQUET_EXCEPTION) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h index cfd9d3ba5bd..2c78949e8e1 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace parquet { @@ -18,7 +19,7 @@ namespace DB class ParquetColumnReader { public: - virtual ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) = 0; + virtual ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) = 0; virtual ~ParquetColumnReader() = default; }; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index 1f83c74f9ad..be9710e1726 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -142,15 +142,19 @@ private: class LazyNullMap { public: - LazyNullMap(UInt32 size_) : size(size_), col_nullable(nullptr) {} + LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {} - void setNull(UInt32 cursor) + template + requires std::is_integral_v + void setNull(T cursor) { initialize(); null_map[cursor] = 1; } - void setNull(UInt32 cursor, UInt32 count) + template + requires std::is_integral_v + void setNull(T cursor, UInt32 count) { initialize(); memset(null_map + cursor, 1, count); @@ -159,7 +163,7 @@ public: ColumnPtr getNullableCol() { return col_nullable; } private: - UInt32 size; + UInt64 size; UInt8 * null_map; ColumnPtr col_nullable; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp index 659a7a11969..3afc66dcb36 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -189,7 +189,7 @@ void RleValuesReader::setValueBySteps( res_values += *(step_iterator++); visitValues( - col_data_steps.size() - 1, + static_cast(col_data_steps.size() - 1), /* individual_visitor */ [&](Int32 val) { *res_values = val_getter(val); @@ -394,14 +394,14 @@ void ParquetRleLCReader::readBatch( cursor, num_values, max_def_level, - /* individual_null_visitor */ [&](UInt32 nest_cursor) { + /* individual_null_visitor */ [&](size_t nest_cursor) { column_data[nest_cursor] = 0; has_null = true; }, - /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector & valid_index_steps) { + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) { rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); }, - /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) { + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) { if (is_valid) { rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); @@ -461,10 +461,11 @@ void ParquetRleDictReader::readBatch( cursor, num_values, max_def_level, - /* individual_null_visitor */ [&](UInt32) {}, - /* stepped_valid_visitor */ [&](UInt32, const std::vector & valid_index_steps) { + /* individual_null_visitor */ [&](size_t) {}, + /* stepped_valid_visitor */ [&](size_t, const std::vector & valid_index_steps) { value_cache.resize(valid_index_steps.size()); - rle_data_reader->setValues(value_cache.data() + 1, valid_index_steps.size() - 1, val_getter); + rle_data_reader->setValues( + value_cache.data() + 1, static_cast(valid_index_steps.size() - 1), val_getter); append_nulls(valid_index_steps[0]); for (size_t i = 1; i < valid_index_steps.size(); i++) @@ -473,7 +474,7 @@ void ParquetRleDictReader::readBatch( append_nulls(valid_index_steps[i] - 1); } }, - /* repeated_visitor */ [&](bool is_valid, UInt32, UInt32 count) { + /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) { if (is_valid) { value_cache.resize(count); @@ -504,13 +505,13 @@ void ParquetRleDictReader::readBatch( cursor, num_values, max_def_level, - /* individual_null_visitor */ [&](UInt32 nest_cursor) { + /* individual_null_visitor */ [&](size_t nest_cursor) { null_map.setNull(nest_cursor); }, - /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector & valid_index_steps) { + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) { rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); }, - /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) { + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) { if (is_valid) { rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h index 2c95f495339..66a1f4877e4 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -25,7 +24,7 @@ namespace ErrorCodes class RleValuesReader { public: - RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_) + RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_) : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) {} /** @@ -45,7 +44,7 @@ public: * @brief Visit num_values elements. * For RLE encoding, for same group, the value is same, so they can be visited repeatedly. * For BitPacked encoding, the values may be different with each other, so they must be visited individual. - * + * * @tparam IndividualVisitor A callback with signature: void(Int32 val) * @tparam RepeatedVisitor A callback with signature: void(UInt32 count, Int32 val) */ @@ -55,10 +54,10 @@ public: /** * @brief Visit num_values elements by parsed nullability. * If the parsed value is same as max_def_level, then it is processed as null value. - * + * * @tparam IndividualVisitor A callback with signature: void(size_t cursor) * @tparam RepeatedVisitor A callback with signature: void(size_t cursor, UInt32 count) - * + * * Because the null map is processed, so only the callbacks only need to process the valid data. */ template @@ -74,18 +73,18 @@ public: * @brief Visit num_values elements by parsed nullability. * It may be inefficient to process the valid data individually like in visitNullableValues, * so a valid_index_steps index array is generated first, in order to process valid data continuously. - * + * * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value * @tparam SteppedValidVisitor A callback with signature: * void(size_t cursor, const std::vector & valid_index_steps) * for n valid elements with null value interleaved in a BitPacked group, * i-th item in valid_index_steps describes how many elements in column there are after (i-1)-th valid element. - * + * * take following BitPacked group with 2 valid elements for example: * null valid null null valid null * then the valid_index_steps has values [1, 3, 2]. * Please note that the the sum of valid_index_steps is same as elements number in this group. - * + * * @tparam RepeatedVisitor A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count) */ template @@ -99,7 +98,7 @@ public: /** * @brief Set the Values to column_data directly - * + * * @tparam TValue The type of column data. * @tparam ValueGetter A callback with signature: TValue(Int32 val) */ @@ -118,7 +117,7 @@ public: ValueGetter && val_getter); private: - std::unique_ptr bit_reader; + std::unique_ptr bit_reader; std::vector cur_packed_bit_values; std::vector valid_index_steps; @@ -203,7 +202,7 @@ private: /** * Read data according to the format of ColumnLowCardinality format. - * + * * Only index and null column are processed in this class. * And all null value is mapped to first index in dictionary, * so the result index valued is added by one. @@ -232,7 +231,7 @@ private: /** * The definition level is RLE or BitPacked encoded, * and the index of dictionary is also RLE or BitPacked encoded. - * + * * while the result is not parsed as a low cardinality column, * instead, a normal column is generated. */ diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp index 00dee9074fe..2e3d329bcd2 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -58,7 +59,7 @@ void visitColStrIndexType(size_t data_size, TypeVisitor && visitor) } } -void reserveColumnStrRows(MutableColumnPtr & col, UInt32 rows_num) +void reserveColumnStrRows(MutableColumnPtr & col, UInt64 rows_num) { col->reserve(rows_num); @@ -212,7 +213,7 @@ ParquetLeafColReader::ParquetLeafColReader( } template -ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt32 rows_num, const String & name) +ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt64 rows_num, const String & name) { reading_rows_num = rows_num; auto readPageIfEmpty = [&]() { @@ -228,7 +229,7 @@ ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt32 rows_num, // if dictionary page encountered, another page should be read readPageIfEmpty(); - auto read_values = std::min(rows_num, cur_page_values); + auto read_values = static_cast(std::min(rows_num, static_cast(cur_page_values))); data_values_reader->readBatch(column, *null_map, read_values); cur_page_values -= read_values; @@ -239,7 +240,7 @@ ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt32 rows_num, } template <> -void ParquetLeafColReader::resetColumn(UInt32 rows_num) +void ParquetLeafColReader::resetColumn(UInt64 rows_num) { if (reading_low_cardinality) { @@ -261,7 +262,7 @@ void ParquetLeafColReader::resetColumn(UInt32 rows_num) } template -void ParquetLeafColReader::resetColumn(UInt32 rows_num) +void ParquetLeafColReader::resetColumn(UInt64 rows_num) { assert(!reading_low_cardinality); @@ -403,9 +404,9 @@ void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) assert(col_descriptor.max_definition_level() >= 0); std::unique_ptr def_level_reader; if (col_descriptor.max_definition_level() > 0) { - auto bit_width = arrow::BitUtil::Log2(col_descriptor.max_definition_level() + 1); + auto bit_width = arrow::bit_util::Log2(col_descriptor.max_definition_level() + 1); auto num_bytes = ::arrow::util::SafeLoadAs(buffer); - auto bit_reader = std::make_unique(buffer + 4, num_bytes); + auto bit_reader = std::make_unique(buffer + 4, num_bytes); num_bytes += 4; buffer += num_bytes; max_size -= num_bytes; @@ -447,7 +448,7 @@ void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) // refer to: DictDecoderImpl::SetData in encoding.cc auto bit_width = *buffer; - auto bit_reader = std::make_unique(++buffer, --max_size); + auto bit_reader = std::make_unique(++buffer, --max_size); data_values_reader = createDictReader( std::move(def_level_reader), std::make_unique(std::move(bit_reader), bit_width)); break; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h index f730afe40ed..c5b14132f17 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include @@ -28,7 +27,7 @@ public: std::unique_ptr meta_, std::unique_ptr reader_); - ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) override; + ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) override; private: const parquet::ColumnDescriptor & col_descriptor; @@ -42,13 +41,13 @@ private: ColumnPtr dictionary; + UInt64 reading_rows_num = 0; UInt32 cur_page_values = 0; - UInt32 reading_rows_num = 0; bool reading_low_cardinality = false; Poco::Logger * log; - void resetColumn(UInt32 rows_num); + void resetColumn(UInt64 rows_num); void degradeDictionary(); ColumnWithTypeAndName releaseColumn(const String & name); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index a5744b85174..9ff4a7a16aa 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -30,21 +31,14 @@ namespace ErrorCodes extern const int PARQUET_EXCEPTION; } -// #define THROW_ARROW_NOT_OK(status) \ -// do \ -// { \ -// if (::arrow::Status _s = (status); !_s.ok()) \ -// throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ -// } while (false) - - #define THROW_PARQUET_EXCEPTION(s) \ do \ { \ try { (s); } \ catch (const ::parquet::ParquetException & e) \ { \ - throw Exception(e.what(), ErrorCodes::PARQUET_EXCEPTION); \ + auto msg = PreformattedMessage::create("Excepted when reading parquet: {}", e.what()); \ + throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION); \ } \ } while (false) @@ -172,13 +166,14 @@ ParquetRecordReader::ParquetRecordReader( auto idx = file_reader->metadata()->schema()->ColumnIndex(col_with_name.name); if (idx < 0) { - throw Exception("can not find column with name: " + col_with_name.name, ErrorCodes::BAD_ARGUMENTS); + auto msg = PreformattedMessage::create("can not find column with name: {}", col_with_name.name); + throw Exception(std::move(msg), ErrorCodes::BAD_ARGUMENTS); } parquet_col_indice.push_back(idx); } } -Chunk ParquetRecordReader::readChunk(UInt32 num_rows) +Chunk ParquetRecordReader::readChunk(size_t num_rows) { if (!left_rows) { @@ -190,7 +185,7 @@ Chunk ParquetRecordReader::readChunk(UInt32 num_rows) } Columns columns(header.columns()); - auto num_rows_read = std::min(static_cast(num_rows), cur_row_group_left_rows); + auto num_rows_read = std::min(num_rows, cur_row_group_left_rows); for (size_t i = 0; i < header.columns(); i++) { columns[i] = castColumn( diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h index d77cab6553b..69cdaa5ccb7 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h @@ -1,9 +1,8 @@ #pragma once -#include #include #include -#include +#include #include #include @@ -22,8 +21,8 @@ public: std::shared_ptr<::arrow::io::RandomAccessFile> file, const parquet::ReaderProperties& properties); - Chunk readChunk(UInt32 num_rows); - + Chunk readChunk(size_t num_rows); + // follow the scale generated by spark static constexpr UInt8 default_datetime64_scale = 9; From dbdff6c038834f973d803f44ef096b6015d09e3b Mon Sep 17 00:00:00 2001 From: copperybean Date: Sun, 28 Jan 2024 09:56:36 +0800 Subject: [PATCH 151/651] support reading simple types by native parquet reader Change-Id: I38b8368b022263d9a71cb3f3e9fdad5d6ca26753 --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Formats/Impl/Parquet/ParquetDataBuffer.h | 2 +- .../Impl/Parquet/ParquetLeafColReader.cpp | 12 +- .../Impl/Parquet/ParquetRecordReader.cpp | 73 +++++++---- .../Impl/Parquet/ParquetRecordReader.h | 14 ++- .../Formats/Impl/ParquetBlockInputFormat.cpp | 118 ++++++++++++------ .../Formats/Impl/ParquetBlockInputFormat.h | 4 + 9 files changed, 153 insertions(+), 73 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a0de354a03..2465164e912 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1013,6 +1013,7 @@ class IColumn; M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \ M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \ + M(Bool, input_format_parquet_use_native_reader, false, "When reading Parquet files, to use native reader instead of arrow reader.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 43ccee173f0..557b49d2a0a 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -154,6 +154,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order; format_settings.parquet.filter_push_down = settings.input_format_parquet_filter_push_down; + format_settings.parquet.use_native_reader = settings.input_format_parquet_use_native_reader; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index d5fedf99adb..0ac4ea5e0fb 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -258,6 +258,7 @@ struct FormatSettings bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; bool filter_push_down = true; + bool use_native_reader = false; std::unordered_set skip_row_groups = {}; bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index be9710e1726..d4956f83092 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -34,7 +34,7 @@ public: void ALWAYS_INLINE readValue(TValue & dst) { checkAvaible(sizeof(TValue)); - dst = *reinterpret_cast(data); + dst = *(reinterpret_cast(data)); consume(sizeof(TValue)); } diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp index 2e3d329bcd2..e2677d7cae3 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -274,7 +274,14 @@ void ParquetLeafColReader::resetColumn(UInt64 rows_num) template void ParquetLeafColReader::degradeDictionary() { + // if last batch read all dictionary indices, then degrade is not needed this time + if (!column) + { + dictionary = nullptr; + return; + } assert(dictionary && column->size()); + null_map = std::make_unique(reading_rows_num); auto col_existing = std::move(column); column = ColumnString::create(); @@ -304,7 +311,8 @@ void ParquetLeafColReader::degradeDictionary() col_dest.getOffsets()[i] = col_dest.getChars().size(); } }); - LOG_INFO(log, "degraded dictionary to normal column"); + dictionary = nullptr; + LOG_DEBUG(log, "degraded dictionary to normal column"); } template @@ -364,7 +372,7 @@ void ParquetLeafColReader::readPage() throw new Exception( ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding()); } - LOG_INFO(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); + LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); dictionary = readDictPage(dict_page, col_descriptor, base_data_type); if (std::is_same_v) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 9ff4a7a16aa..42f131ff794 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -31,31 +31,29 @@ namespace ErrorCodes extern const int PARQUET_EXCEPTION; } -#define THROW_PARQUET_EXCEPTION(s) \ - do \ - { \ - try { (s); } \ - catch (const ::parquet::ParquetException & e) \ - { \ +#define THROW_PARQUET_EXCEPTION(s) \ + do \ + { \ + try { (s); } \ + catch (const ::parquet::ParquetException & e) \ + { \ auto msg = PreformattedMessage::create("Excepted when reading parquet: {}", e.what()); \ throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION); \ - } \ + } \ } while (false) namespace { -Int64 getTotalRows(const parquet::FileMetaData & meta_data) +std::unique_ptr createFileReader( + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file) { - Int64 res = 0; - for (int i = 0; i < meta_data.num_row_groups(); i++) - { - res += meta_data.RowGroup(i)->num_rows(); - } + std::unique_ptr res; + THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open(std::move(arrow_file))); return res; } -std::unique_ptr createReader( +std::unique_ptr createColReader( const parquet::ColumnDescriptor & col_descriptor, DataTypePtr ch_type, std::unique_ptr meta, @@ -86,7 +84,7 @@ std::unique_ptr createReader( } case parquet::Type::FIXED_LEN_BYTE_ARRAY: { - if (col_descriptor.type_length() <= static_cast(DecimalUtils::max_precision)) + if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) { auto data_type = std::make_shared( col_descriptor.type_precision(), col_descriptor.type_scale()); @@ -148,16 +146,21 @@ std::unique_ptr createReader( ParquetRecordReader::ParquetRecordReader( Block header_, - std::shared_ptr<::arrow::io::RandomAccessFile> file, - const parquet::ReaderProperties& properties) - : header(std::move(header_)) + parquet::ArrowReaderProperties reader_properties_, + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + const FormatSettings & format_settings, + std::vector row_groups_indices_) + : file_reader(createFileReader(std::move(arrow_file))) + , reader_properties(reader_properties_) + , header(std::move(header_)) + , max_block_size(format_settings.parquet.max_block_size) + , row_groups_indices(std::move(row_groups_indices_)) + , left_rows(getTotalRows(*file_reader->metadata())) { // Only little endian system is supported currently static_assert(std::endian::native == std::endian::little); log = &Poco::Logger::get("ParquetRecordReader"); - THROW_PARQUET_EXCEPTION(file_reader = parquet::ParquetFileReader::Open(std::move(file), properties)); - left_rows = getTotalRows(*file_reader->metadata()); parquet_col_indice.reserve(header.columns()); column_readers.reserve(header.columns()); @@ -167,13 +170,18 @@ ParquetRecordReader::ParquetRecordReader( if (idx < 0) { auto msg = PreformattedMessage::create("can not find column with name: {}", col_with_name.name); - throw Exception(std::move(msg), ErrorCodes::BAD_ARGUMENTS); + throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION); } parquet_col_indice.push_back(idx); } + if (reader_properties.pre_buffer()) + { + THROW_PARQUET_EXCEPTION(file_reader->PreBuffer( + row_groups_indices, parquet_col_indice, reader_properties.io_context(), reader_properties.cache_options())); + } } -Chunk ParquetRecordReader::readChunk(size_t num_rows) +Chunk ParquetRecordReader::readChunk() { if (!left_rows) { @@ -185,7 +193,7 @@ Chunk ParquetRecordReader::readChunk(size_t num_rows) } Columns columns(header.columns()); - auto num_rows_read = std::min(num_rows, cur_row_group_left_rows); + auto num_rows_read = std::min(max_block_size, cur_row_group_left_rows); for (size_t i = 0; i < header.columns(); i++) { columns[i] = castColumn( @@ -201,20 +209,33 @@ Chunk ParquetRecordReader::readChunk(size_t num_rows) void ParquetRecordReader::loadNextRowGroup() { Stopwatch watch(CLOCK_MONOTONIC); - cur_row_group_reader = file_reader->RowGroup(next_row_group_idx); + cur_row_group_reader = file_reader->RowGroup(row_groups_indices[next_row_group_idx]); column_readers.clear(); for (size_t i = 0; i < parquet_col_indice.size(); i++) { - column_readers.emplace_back(createReader( + column_readers.emplace_back(createColReader( *file_reader->metadata()->schema()->Column(parquet_col_indice[i]), header.getByPosition(i).type, cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]), cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i]))); } - LOG_DEBUG(log, "reading row group {} consumed {} ms", next_row_group_idx, watch.elapsedNanoseconds() / 1e6); + + auto duration = watch.elapsedNanoseconds() / 1e6; + LOG_DEBUG(log, "reading row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration); + ++next_row_group_idx; cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows(); } +Int64 ParquetRecordReader::getTotalRows(const parquet::FileMetaData & meta_data) +{ + Int64 res = 0; + for (size_t i = 0; i < row_groups_indices.size(); i++) + { + res += meta_data.RowGroup(row_groups_indices[i])->num_rows(); + } + return res; +} + } diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h index 69cdaa5ccb7..4789be59ec8 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -18,23 +19,29 @@ class ParquetRecordReader public: ParquetRecordReader( Block header_, - std::shared_ptr<::arrow::io::RandomAccessFile> file, - const parquet::ReaderProperties& properties); + parquet::ArrowReaderProperties reader_properties_, + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + const FormatSettings & format_settings, + std::vector row_groups_indices_); - Chunk readChunk(size_t num_rows); + Chunk readChunk(); // follow the scale generated by spark static constexpr UInt8 default_datetime64_scale = 9; private: std::unique_ptr file_reader; + parquet::ArrowReaderProperties reader_properties; Block header; std::shared_ptr cur_row_group_reader; ParquetColReaders column_readers; + UInt64 max_block_size; + std::vector parquet_col_indice; + std::vector row_groups_indices; UInt64 left_rows; UInt64 cur_row_group_left_rows = 0; int next_row_group_idx = 0; @@ -42,6 +49,7 @@ private: Poco::Logger * log; void loadNextRowGroup(); + Int64 getTotalRows(const parquet::FileMetaData & meta_data); }; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index d41cb3447de..e35d53dc4f4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -392,6 +393,8 @@ void ParquetBlockInputFormat::initializeIfNeeded() { if (std::exchange(is_initialized, true)) return; + if (format_settings.parquet.use_native_reader) + LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "using native parquet reader"); // Create arrow file adapter. // TODO: Make the adapter do prefetching on IO threads, based on the full set of ranges that @@ -479,23 +482,35 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat if (metadata->writer_version().VersionLt(parquet::ApplicationVersion::PARQUET_816_FIXED_VERSION())) properties.set_pre_buffer(false); - parquet::arrow::FileReaderBuilder builder; - THROW_ARROW_NOT_OK( - builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); - builder.properties(properties); - // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. - THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); + if (format_settings.parquet.use_native_reader) + { + row_group_batch.native_record_reader = std::make_shared( + getPort().getHeader(), + std::move(properties), + arrow_file, + format_settings, + row_group_batch.row_groups_idxs); + } + else + { + parquet::arrow::FileReaderBuilder builder; + THROW_ARROW_NOT_OK( + builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata)); + builder.properties(properties); + // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators. + THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader)); - THROW_ARROW_NOT_OK( - row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); + THROW_ARROW_NOT_OK( + row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader)); - row_group_batch.arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), - "Parquet", - format_settings.parquet.allow_missing_columns, - format_settings.null_as_default, - format_settings.date_time_overflow_behavior, - format_settings.parquet.case_insensitive_column_matching); + row_group_batch.arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), + "Parquet", + format_settings.parquet.allow_missing_columns, + format_settings.null_as_default, + format_settings.date_time_overflow_behavior, + format_settings.parquet.case_insensitive_column_matching); + } } void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_batch_idx) @@ -561,6 +576,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un lock.unlock(); auto end_of_row_group = [&] { + row_group_batch.native_record_reader.reset(); row_group_batch.arrow_column_to_ch_column.reset(); row_group_batch.record_batch_reader.reset(); row_group_batch.file_reader.reset(); @@ -573,35 +589,55 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un // reached. Wake up read() instead. condvar.notify_all(); }; - - if (!row_group_batch.record_batch_reader) - initializeRowGroupBatchReader(row_group_batch_idx); - - auto batch = row_group_batch.record_batch_reader->Next(); - if (!batch.ok()) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); - - if (!*batch) + auto get_pending_chunk = [&](size_t num_rows, Chunk chunk = {}) { - end_of_row_group(); - return; - } - - auto tmp_table = arrow::Table::FromRecordBatches({*batch}); - - size_t approx_chunk_original_size = static_cast(std::ceil(static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * (*tmp_table)->num_rows())); - PendingChunk res = { - .chunk = {}, - .block_missing_values = {}, - .chunk_idx = row_group_batch.next_chunk_idx, - .row_group_batch_idx = row_group_batch_idx, - .approx_original_chunk_size = approx_chunk_original_size + size_t approx_chunk_original_size = static_cast(std::ceil( + static_cast(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * num_rows)); + return PendingChunk{ + .chunk = std::move(chunk), + .block_missing_values = {}, + .chunk_idx = row_group_batch.next_chunk_idx, + .row_group_batch_idx = row_group_batch_idx, + .approx_original_chunk_size = approx_chunk_original_size + }; }; - /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. - /// Otherwise fill the missing columns with zero values of its type. - BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + if (!row_group_batch.record_batch_reader && !row_group_batch.native_record_reader) + initializeRowGroupBatchReader(row_group_batch_idx); + + PendingChunk res; + if (format_settings.parquet.use_native_reader) + { + auto chunk = row_group_batch.native_record_reader->readChunk(); + if (!chunk) + { + end_of_row_group(); + return; + } + + auto num_rows = chunk.getNumRows(); + res = get_pending_chunk(num_rows, std::move(chunk)); + } + else + { + auto batch = row_group_batch.record_batch_reader->Next(); + if (!batch.ok()) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString()); + + if (!*batch) + { + end_of_row_group(); + return; + } + + auto tmp_table = arrow::Table::FromRecordBatches({*batch}); + res = get_pending_chunk((*tmp_table)->num_rows()); + + /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. + /// Otherwise fill the missing columns with zero values of its type. + BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; + res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + } lock.lock(); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index b5b884b5efa..a737c695fd6 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -16,6 +16,7 @@ namespace DB { class ArrowColumnToCHColumn; +class ParquetRecordReader; // Parquet files contain a metadata block with the following information: // * list of columns, @@ -210,6 +211,9 @@ private: std::vector row_groups_idxs; // These are only used by the decoding thread, so don't require locking the mutex. + // If use_native_reader, only native_record_reader is used; + // otherwise, only native_record_reader is not used. + std::shared_ptr native_record_reader; std::unique_ptr file_reader; std::shared_ptr record_batch_reader; std::unique_ptr arrow_column_to_ch_column; From 8172f6cec023df144ef20a7cfd49b43548cefd41 Mon Sep 17 00:00:00 2001 From: copperybean Date: Wed, 21 Feb 2024 00:17:30 +0800 Subject: [PATCH 152/651] log duration while reading parquet Change-Id: If79741b7456667a8dde3e355d9dc684c2dd84f4f --- .../Formats/Impl/ParquetBlockInputFormat.cpp | 11 +++++++++++ src/Processors/Formats/Impl/ParquetBlockInputFormat.h | 4 ++++ 2 files changed, 15 insertions(+) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index e35d53dc4f4..7faa7300416 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -673,6 +673,15 @@ void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional row } } +Chunk ParquetBlockInputFormat::generate() +{ + auto res = IInputFormat::generate(); + if (!res) + LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "{} ms consumed by reading parquet file", consumed_nanosecs / 1e6); + + return res; +} + Chunk ParquetBlockInputFormat::read() { initializeIfNeeded(); @@ -683,6 +692,8 @@ Chunk ParquetBlockInputFormat::read() if (need_only_count) return getChunkForCount(row_group_batches[row_group_batches_completed++].total_rows); + Stopwatch watch(CLOCK_MONOTONIC); + SCOPE_EXIT({ consumed_nanosecs += watch.elapsedNanoseconds(); }); std::unique_lock lock(mutex); while (true) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index a737c695fd6..a94637da942 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -65,6 +65,8 @@ public: size_t getApproxBytesReadForChunk() const override { return previous_approx_bytes_read_for_chunk; } + Chunk generate() override; + private: Chunk read() override; @@ -286,6 +288,8 @@ private: std::exception_ptr background_exception = nullptr; std::atomic is_stopped{0}; bool is_initialized = false; + + UInt64 consumed_nanosecs = 0; }; class ParquetSchemaReader : public ISchemaReader From e0179150c1671f75f9480ebca17c4ea2595ae811 Mon Sep 17 00:00:00 2001 From: copperybean Date: Fri, 23 Feb 2024 01:09:02 +0800 Subject: [PATCH 153/651] Revert "log duration while reading parquet" This reverts commit 5df94b7f8955b541ae37e4bbdc13a1fec9ddbbd9. --- .../Formats/Impl/ParquetBlockInputFormat.cpp | 11 ----------- src/Processors/Formats/Impl/ParquetBlockInputFormat.h | 4 ---- 2 files changed, 15 deletions(-) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 7faa7300416..e35d53dc4f4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -673,15 +673,6 @@ void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional row } } -Chunk ParquetBlockInputFormat::generate() -{ - auto res = IInputFormat::generate(); - if (!res) - LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "{} ms consumed by reading parquet file", consumed_nanosecs / 1e6); - - return res; -} - Chunk ParquetBlockInputFormat::read() { initializeIfNeeded(); @@ -692,8 +683,6 @@ Chunk ParquetBlockInputFormat::read() if (need_only_count) return getChunkForCount(row_group_batches[row_group_batches_completed++].total_rows); - Stopwatch watch(CLOCK_MONOTONIC); - SCOPE_EXIT({ consumed_nanosecs += watch.elapsedNanoseconds(); }); std::unique_lock lock(mutex); while (true) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index a94637da942..a737c695fd6 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -65,8 +65,6 @@ public: size_t getApproxBytesReadForChunk() const override { return previous_approx_bytes_read_for_chunk; } - Chunk generate() override; - private: Chunk read() override; @@ -288,8 +286,6 @@ private: std::exception_ptr background_exception = nullptr; std::atomic is_stopped{0}; bool is_initialized = false; - - UInt64 consumed_nanosecs = 0; }; class ParquetSchemaReader : public ISchemaReader From 18b3ebcda363eb7e9b8f52c7170d8bc208bb9b07 Mon Sep 17 00:00:00 2001 From: copperybean Date: Fri, 23 Feb 2024 01:10:22 +0800 Subject: [PATCH 154/651] add test Change-Id: I53ade40ba24a742a21f9e09dbab7fff90b032b4b --- .../02998_native_parquet_reader.parquet | Bin 0 -> 76392 bytes .../02998_native_parquet_reader.reference | 2000 +++++++++++++++++ .../02998_native_parquet_reader.sh | 210 ++ 3 files changed, 2210 insertions(+) create mode 100644 tests/queries/0_stateless/02998_native_parquet_reader.parquet create mode 100644 tests/queries/0_stateless/02998_native_parquet_reader.reference create mode 100755 tests/queries/0_stateless/02998_native_parquet_reader.sh diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.parquet b/tests/queries/0_stateless/02998_native_parquet_reader.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c0d222342e31969fd5e6b4fb0fd8d0ecd4a822bc GIT binary patch literal 76392 zcmeFa2UHZ<*7jdjLRG1#7*NcZF=GH4V*>`vn7UfKN)ZzV6f^rubDC=l>mf^Z- zqw=2B(?(r=cDA9rxJhB;L3FaI0sDdNA@0=`ljj(2m|~TW>|lDI&YqoX=qYYiEMx_Dy%Hgw@r=N&ukx2)p2rw;f`sW@`W8j z$LpG~3k;s(c7>OR(sxZw*{^J0alfv(ywGsZv_tvEdeH}T&DceTe&SB0gglH+F*Rqu zv;D<`x{~r@!+len@`LrJ59u7(C58dwF2zb7PCqcUU@2~(n4l{qFEugLPZjg9P zS5^)-q?-0BmYffLT<64w7zT^`lydSI+H7)WSW)MY1 zsUVM|(@icc&q-pU&PEP5WSCT?I5(a?rE9}R7>0=PN=4b1&NQ`UOK?NQ)4ED>q~WP) zzfzK$K%ddIW0xDe!~=@0Jdu88YR_76!^E??%JK@sbJIbk6z4~u({*518ob3rN)>q$ zon`9Cmga_w=XF)(RfZR)1f>i&nZBUYva1av#KTH8c?z9va%HW#kz$grx*TQ5F&$CL za#QJxIyW}jFiJeC)R3pqxh8kE95-6Lq^l{%81hWVl=9qk`m(MQyT;%n9#`z-8T3n2 zXSM=2M!ce{C9gHSGM!LtIDh)8t_!=)FjhRN)Rt$``KGRHMQ)sUO;<-=Z+LA=R4Q?^ z=<7NUc7tKOcuJ`&&!*p)y0NyLub8Z>CvP;oHJw%}b93k$y6)^I!vyh+QeU1+zcck< zt8f#=o4N*atl_=stWuSmN8i%*WH%f9#B++hJfHqx>cv*$CW*Io4dpF{kEZiVbuNIu zqwCFXHB1&SD2?O=bb+Z4TZ5Y--qkghw;4W}l9Za_@XG~j~iRGpqR8s>;Ml-67u!jwS;$uZCN7JR1k!))&NX*r_%0~=klqZTK7enXiMzKc?!D6c7Cavd z7b3pYxy#24WfimH%&nzg>3rDZhEOq0=_IeC%PC`64HqWn>pIIP4CR${#f4i>zt)Xq zPa49-45f>_fv%v8W7}{M;u~F8IniLFWGZdBjr3dHc=nVbQhcg-$eZYjiZ9!aTQ0uS zb(2pUDk;yD_FOFeUN?a~V^|?RSGvoaX@wc#IL%(@+Ct}<)zY@i=)5krn8p~YsFVe zKY15zr_5lxaO=eHy8iMNLoFp=>B{YCWn?(y@kbKk7K>48b;!L!qFps@u*dl&Z2FnUAqPvGQ5z%k8I&3k%tMh8^M;Wr%!$ZlWw=`*AzP z62ee9#n4pws`Td$(j|q(?0rL=_)YPW57Et(CF}rhmuMvnlOGtGE8mrYTmoH6Sjs*$ z>=u70-tuAEL0QJ?xIJQNVYvLr&_bb9fjdH%5dzuA23e%l5%N*Gr4q#IIiqMTjFg`k zS}BY=h&x7?6@uAR!(OqNI!ZoHw^l;f!Q4KvoG@B88ypo2B+v=Eyb#K!8BC(3>LZ_| zos=+E8RFPB1%BN_J62T7P;>C)>I62ecqVVca?lfIV zh-9A{_KU^U@$wnEjk28e;tq(mg0K9{&{ip-4&%$YYH)Jp5d5SR-Gzerrne^tPgiwv=gSu zFAeTWIdu$og{~#6WnURih~?Gk@>RN%vW^|gofK;eGvs_jXQhHVj=M(J5!SP>4T++S z>MviXyC@sj@!Tn~t}s)6W9X_>RDHQ*x}LC+eQP)^R#IolH)s!K6FY%BBi0vY%kK=` z6kBy7cav@)#Io-VXT{3u9QhXAUD?d~apy#PVXpka&_k)BPU3FU4TUZ2N5grssya`; zL-$m+vXi+BVk2R`Twv&>R8yyLcj?B$HujStNvy60$oJ^p%64`tcTsF2ERa7N`Y1Kj zX|*`7tD=LjME-8*uhdp&a*ya1!fy75;hI=ST`E7O2Pk{kS=@E8rLat<#DPj(bvE~e zZY9VpEhdZg)Id3v)+t7I4tGOrEd{9Nb*g;q+TZzLI2Xz^jO?MO$*izynv4y%y&Y``P z!)ze;Skwxu<rHXDsv}`SoR9dSc+)LVB zIL4M0&7z|kBfp|YDaYARE=}wttdYx!qZKDLjLWAx3n$p}V!G(8u9aWYKFUcpoXZfq z2U#MtJytozMsiO@4`G8`Q5>hVQI~V?=x)MkwvzZv zY^!dR-_zrjGwcfPx!7IUB-@I!NR{ls&P|mTdxEEqiVY6IC zoTzkAS91k)FX22}Rm>JUs$1kww4ZW;jpA~|-ojS7nm9?(s?pqMx{r{=Ru^+cS9P2G zg`TWjWMjBI(Nowi*AS;DZt5EDE8SPP#MTsFitg$T`5Qe|xy-KRUWxsLowA)cP3fer zqeE>B?1hJ@;B1AncNBi!+oi>IRNt1`5~MI^r9#tGZjJ8Gq$E zyODb<>V!RVU2&%3p>E7erpj_LW|ne;jpg2pdci2y7iTNo)y8WnzSjHgSW*dqHVlUMsbId&D4!e!}BqEgM zM&f*>x4NC<8A-UyHWojNeNO};EksHhs3yo|n5D{NR_17Fgm74HDK1lVs*$s1MhZ{ZRw9O_9+As3fl4a7 zmn$ZX5{}BP#UMqm?&HcaqXjeTC|XE^)MIjaCRj;hO`N6VBOI5V#1Lh$s&EyUF+w`) zEV7b8Jt5mLp-Kj;a-1|)I4NtyFhx}3xr)p^fr{p$b zgfc`uz}Ygs!ZWt5SV9`Ao|Y>!k;-%SAXieFAe@ogiOUr)^$=HunJ8qj?L{kTn0i*O z%B)acunAl#$xk>ZcMw-9-s)kl8Z$}AW;=?drQzy%xjM5-$zhLhWu(c%1z9VuRz|2t zxf;wAA(wR(t)-D_l3bICQu5eiTv=(Va8Y&>qm@zWan6pJCcI?b#d6YU^^#nRiBVp$ zC%E#`bm6kxNnE4&s3*DF%nTu)?JQQ1#;8~1I?P(-HJixUNdCfAxr?|?8LOV+>M}D0 zSlCrAHc=Xzn2QgBN*6V?m#WzfhN+!@?A4bO*~KWTj$3=`-P=;TiM5Tgpox*KD2-FE z$@LgwWKVOIq*=msnHbse>KU#+Gg}}=)>iUWljQ~sF|ud5%F-O+hD?m?1oa$e&&(Bw zk*y+4RBy@+8DeD5b5*5z!Y!E?SwHmx*NB-f5F=Ynnxx*A8#BbnCUMoJ0O5{IjO=9f zBG-gjAP^&4Lz<%Am76le$X?=VN(+U1GBL7K)yrHnW|2UQterGXO_7^3#K>OZYDtTQ z`!X@I)77h-1G7XRMz*#zLwz8(V2F{u#?_IQ3J+ysWc}6aTuWw|K#Xi%X{P!}Zp9EI zo6Oad0)@viF|xDN8(eE9NFYYGzBF5XB0Dm~$ll}{NWns?OpNRt^%m#Egb2jQ+Dmg) zv+T?eBYT@`D1{1XGBL9A)H|Gp2@{BsZ6wWC(`6Ti7}>jAV<}w7kcp8EQ15YVm5Dt)(C}S9WKJku`IUQjCx%6C)d}rg5E^H3BiRPEv^aQtr$UBb&}S zOKXK!GBL8DY6jPZStk%9tC7Oge7P$_jBF<7BCQu*%f!frt4}!(W`jVCY#S*;eIs{c zh>?B9wUsssZ)IX+Bh}|zcV?47jBGn;x%y7-!4M;x#kH4Wh4(TsvMbaVTu)}RK#Xh$ zX{Gu>?!^!zo6U8Uwg?|(Vq{mTIb3gMt3ZsbR$8qV$bA@MWOF%JX`Ap#CPp?&&Eq_o z?E*2fZc?=RS?}F5F?w0A`m!jBFQao%&rK$Pgp@hU+Tr7JkUY$gWr4ayn*@K#Z)1v_Yke0z-`KJFc6A zH6$Z3vK!U+oSrcX#K?A+HmQtp5JQaY2d;;-S14vAMmARc$PH%p3B<_uls2mtMgv2P zYysCxG6|MOVq~|dpE!|G1Y%@+OIuaeC^5vye&+f}=-Z9N$Zk`=a6_1Qff!j&X}iiB zhcd*-e&zZ~`-S30Vq|xy-#9PkfIy6FKWV30!Z?f}M)o_`Upgq1G!i2lr~cr)nL`3G zvIC@Ds+Dm#LyRoN50nywQbuBAcdIl%f;lV@Bde44sHKe~8DeA^IF}WvRaHE z#T*rgk=08^)!I0kAx74MA0!$nKDC^23`2}8%NwKesTGXl7-D33%od4)jgc5xRV~hsXHE&k$PSU>)rv-6h8Wor{7~t% zP{~M)?0&T*KY=+T5F_g)9Z+qJ6B%M;t@vTmS)sC#7}=2a{OrNl2Fq~jO;PBJU^YeED$5>BOO=mj58QwWGnDvq$@%#BQdfkR2$x( zxhfDNJ61ZW);7*$h>@+xkCUzmb&SNwCaRVAS88-YNQ~@RwJJZ4xg`)I>nEL4?TzypVq~lF zlcd{1LnAS==hfY^2`!Ao$X-+H@qx@^ff(6Y(si|^F^C~Xwmv^wdLpzk5+j?e zHsFJqRDl@TInoWawK0StM%JF6E13mHBQdf!)rNd1lO_-&J5Rc$IvK+lVq_cf^QCmb z*+`7+ZM88U&SVJ0$OcGvRE;r$Ax5?dzd*_qT#UrX-c_6Ok<3$p7}>l{FO#x`jz(f+AE~YQC?-cBMmA7-tZI$X3^B5;`5-A*a5WMm`$To*W0*XF z7};PcRdqA2VTh4+;zOjDg1eCzS+nZQuVr2d#K?w9X=*3qI))fo4Id`u3!RO`$fm0< z{CeiKK#Xj-l%aMpZeWO!ZNo=MZ-lN!Vq`Pbw){rstw4-yr1VtvFm7Uqk!{B>m);58 zjKs)3Q`_^g%zJ?t*%i`rwYzaMLyT+(ex>w5=wT#AHcRcuZ(%+P#K^9aUZ_2dTNz?x zwft(SK8+|W8X00_yYrhQ25x~E*>`FW zelJr@PmFA=^j_5)_c6rC_T)E97WzR(Vq`z4y?7I2sV7Eui}X<)Y*ZLxWP9^lC01`R z5+hrn_Tg2A(-R}RP5PvY#(0JpSxBl}tH%kO83>xq%wA$?JY7!NSS$oAuR zN+t9|jl{@)Rr~V?nUZ>9WaFf7s+aK)LyYVIewSpWA7&&*_PaWePhd*viILqc{ZPG) zhZ$mIb^IQww0^je7+ES_;Eyn6^u);G`;7Pz#-j`|vU=VqS?fm{iIHXE2l2<4vU*}< z_e#a$M;VVZ#K;cj_etgSqm9JKTErXp6HIwMF|sDfGTz5{k|9P`jQSw4O!f10VJCq{O^R6KsX@eD(ZtQUVkveo+< ziIFW4Ka4-iRMrzCdr&GFKf!p8Ax74lKO|MrPc#xEYZX77KhIRv6C;}-m5TQ>USNok z9l;-#s_7>giIFWGKax*ks_Ti7JtCEfpKQFy5FgkD*JuOv=pJlwk5F@}%Q{8Hm%h8Wpd{B^0NewmRN*}Czw`6o;(Ju$M$QoZ;>U1v z)LI{8Bu2J={9NA5IO>U!y(u+_4>qPT#K_L$Z%I!25F;_N_VM%ibjDdvjO=ZxVSK1D zgCRyXfWISY^kGI~WE;gV;4>K)Ju$L(rN;5$#-|K1vJ3fpQX74Qkr>$~@r(FpOj|uM zvMEy2_(UrCH&pi}y=$_TALpQ8$TQ&4< z;=SNdeecDXKV;`U?a-Vv@A|Il@-y52Jn!HjM!VY8R9`5F~|&$7^rTKxQmHNrA~OeMI{gBEa~-^o|3`-u-- zVM)v=O;3vEOxWZOQcIOqt-Z7oY9$k4;>#Wl+n*7i;i-kv`!)$Exf?+ifw6 zRt%O8+tp~89~&1nenK!;qMH5MdMnrEE}*!eXGz=YkFyK>-h=x2&%qv)#oV#qo6`UI zXLnP&?ysBE6ommtmatjR*sT3xZdP=>ftLBhtjTywV1OUU_rl%r(+C6o1MGh>?thH) zNDqpd`;Rf=U@wOG+ecD3Ox4%sO&y!tiQ67o6*c+yoiybz%uF)OeDaB2p)KnM-~cCm zrc9tYe)0%^yw03D3a?Wqj>YT9nN#sPZL~LD{l-nl>#%8~@#-^t242U{n2guyKEr9w zVuZh?hVm~yfErV305uLj!{|Er^>H*ujW0F8!h1Sh+F}@vEJLZ`Q2CY`P&}0PpE8ne z#E%&<5h_3Y&L~{LpU{lP(WFAn@EK!h4e#qSh2~0`E&Rvg!qh^^I2=tYl#KTFr8O3l z##_Fp{Bf5FxPU)#mx(x?mu5^JL3gBl8j|0eG7=YYK;A^*?|D!6!Nu`~ z7iWz5^_v)e;l)wI#?u{p&BgC|`~Lbp47>2s*pYtty{U!2H)_(aix@GvXfa{TNT%>$ z?6hB3FxB4%fIj*{Qm4cP!s>R_x-cc{(dx+A9>inKleX<7YKkcySo1T z?Z2PrA3xK-pXdKPvi|sK{`E6|=jU%3{&#-<{3NLVtM#_-xapHfXM>3rom-r*O+a0n zP4}Ws6ffEm-FDJcg=Q^jG8dC}a}`~pLM&-!uDz6uxU6Z(>H0KhT+wF`Xs!Lnlv7-erSO>iy4#ACSfwChNDp#iJz%x zlSmuHQKRrTOzPxMv@V#|snLb4$z)%DG(e;c;;6CE%)}M^2@Pg=(rTfRnm%bbI-t?s zmhaIcOdf&r6ABgRNvMg13evZAv=}$4P%+(yv?FMgFj+t`rBFedwT>1O#}>9o!^RT@ zCKGfgq+k>_X+ImJk$%4}p*JbKM0&HIm&Q!}bqVjI@Y1C5GtkR;H{3uP1PfpEhlMQ? z-a_H!X@1_gJgTtOnS{2m@FGTEc#-s>KU<_}=mrZfVc3P2M);5YMVyH)u<#PbOD8p@+A+$nm@bi+a7v+-v|CS&#CC3{0lp%KU)&|xmYv-4Od~8 zRAf!4@Cr5gS1*J{v2fJGru}M?rvJKt{*YWi!TNh~=no5Me=jEdjE(=-umAs_D4aIX zrOf}`m#|>`#$S4&{8|#FZ=0qYioQ}~E;jhrb49QJr(%0E%zQ_OaasAty{rS84_s^*=7F$RU;_E*JO$@&A+E&}< zeo{*3qN_Xio$8V=yta__oJ87EbDQ)p+4vhy7G0m2IThbO{al~n$u~Oq!VBvXTw#;K z@l!^Ub)HFuUw9QR)bOJU*LgW-1 zJY9`1Y?%tb9>coI&p)9eUs&O*F|uYu`9b9~9N)%~bsGvxEaXpou|*mc@)g-+{7l7K z&-l^B-{X)h7-9Ja0Q&5r53F8)azEbT7<8~ARwzn9q}j% zSBjc;!|DvCfBe1h%e}%2WPzyg0;Yd*p>S=88jhlZFY$g|!aE@M7&D@CXKFOLLKcP! ze+i=|SA2bNWgM;`nAhM087#RO>CcW+6UhbN!V5@&8Teb#6!;krQ2#za{|6_i34fjo zBu@VrbwvP$hlMY{6_$UVDKfl&{00fue=7dMQIVSz9mqd!_}g3m&nJrh`p;v`kbhbM z{!v$oz>D1fpQHZQ>%Wzhe=7dMQIVVcJp6B;`17Y!^kzkmfA^q@uzvpz|M8&yt)gfo z{?~7_$Suer8s)-Q{L*iS|J8|~PwDr{qPO{X$N#uJSzja*a;aahzHR<_{PX-@IrG~c zs4+pm?)2M4|J!|kQ;@s=@s35W{{Gl^H@U-qekIe+e^&jKJ7IFiH^G06`q%FFb6lj= z|4(-%EyMrrUHp8W|GRtqbCkb5&!6V)zw>VYwVuYM*LFOBb$d@M^-V=b!{ zezR1XA5GR(sBySL`j*lb!)IV^#eXbWOc^%{i!kIHAwROdVK;UpVPGWame!ZBiDPWu zB^^pARdgRyxTU5dG|x!8ltNmiT-q{!U~L?nExPNf3O^xobm^Xr)cT9oWd;=s368t? zw7$Q0qId1?la=DX?YdfEW@kQ2+*W&B%Y<$lHKw@0wc><@4cI32x74k)?)ST{{#GhNQ{)dDA$L1^b zzu0_bSvcHPM`@dl_}L_9q!cUKjb4)8U2H~%ToGeDLe|p?_lhO5I0`OCJ2*X`u(TsZ zdFbj^vcdSSGgwAar15IG7dX&9W`O~dgidto;M`d-K z+o(h>iaOn;9;Hvce|`Xla>cqHd8e#WKU^tlAXu7= z0ehmc)K>{C>%1qAb&eD@60Dfed5^5!C~7rWdZ!A*jQDS#YzE6#JMZGcIuvystiaR< zBd>B4bswzA(A~{XLX!rTXH^$4tl)Q1*bN+3MJN(|j|%^8}PUj^80dJ9&5sO)Qk<_lP+sET)&K+_Pce5agjpKcU2 z2dtzFA&{<3Q60cac#(D_u^mN)f~5^waCjCp$H2;Rs??zpwh1PI6{lUWBexMneE`eW z{MgO848o!;o>7?5sv$JI3!ZyarD@-xaRSTEIX5T`krxD(wOQX_R$Yqf0an(F^g%9& zhc#gN1ufLSh2|nyw$8SXBVc{*fR&pQrgr+P<{PfKe#tVMS-O?4?6z_pRFD&+w6=21u?S&tk|GMAICv+7c39w%5!)d zib@BIcU_eExH&~xgXM2d4Y033QB}YS2;0}a3N*FB(neQ~e+7*vSOLzPoH{TRwGAvk z^Wc%=8Y1?<^2pANZiIM<2P-+q(b5x|OtAc&t5jWpt)Z{L(zq^Oq(fd*2P@8OuDKbh z)c`D0nCZm|XqtoN7hR>F4>S^3Cg<2Y2Q4T{0V~#Qu*htT*aOQy`>E4b#7rVsRIt;w zBxqiP73W;_$fwE_^%X21*Cn;Dx2C8@V5OMTPE^ME)?k^#l)#e6opxZwMpu1phGr~S z1+6wuZH~M+0amhE+$uIh?12@R{p?Ov#KTpv9D<$QU7`67R*G}AL6fUdRB;h;>$>z* zSLB5=SbSQ#v;rFxT*0yqR~sybrW;tv(bXacK{Ep^`_@~^#}%ij%V1GylG_~z#2#2F z+0Uo0LOeVG%PUxuei)jPt#MCH^@8jgs6t>Rxi0fcbfKtDU}@4azLvoG-e76NkKR zu)NZS2AoG-+5wg(XKk(5G)27!D>1l@-5zM_f#svA>GTLS#{n#Rx1fzm2Z|a3R!rK{ zHeZpOK48U$AFy}{jUQP4t7`h5fo3CE39Yy1mqlK@0V^`it7b{kcV__dqCHr8x8U}TkQd{?N=kcne=0t*X<%gqAKW?$nz>-bt+KOf z2hAR^vRdzm2rf@iKfp>%8+PIl>QW+DF*)mprn9J-VA+MV8?qjnj$kEeYE8Y0n$sPu zK(~-bTihwiAFP73=l*#OC~6^CwjqbyQlSY3D`i!!J^P_a0L#{KXR~6+3o8NRPxF=> z+9USBO3K;reK6vo8d#no?Y~ukrYBehn%d>NATI>465K*(S4Cbd0n0u;tI{acq6n}& zLK4yhXrjU5SJ&>|6q<8jc{s*hosBuGGFW!$!@tI%EyLsum&`#V8uD^F6oOos})!Q=_A{$MO}IamOeN3 z;aSvbcd*PM+6VKXnF3a!>lviz8rBsktjXP>arh#fKh~+CXy|tfbZVTF-{&Gg!Qn+-(T*q8C`^ z^wItys7s~6O32+(`5qZiD5O`tpVgYSCk`d_qsGYYfd(u0C&S3jvC78Akp%cy$Ri^0bP zmPg+94>b`Fi@{0`?fm{eH2cAdbZS(xB=X`USf1L>={=AaFTqMo%`ej!wdf;QNui1N zt3mSvtiY&;CJCBmU?n-3&S{Ys3&4uas5bmQ>XJKH{&_oA#~>b-gGGgPS=A4kQ(z@J zHQv1lHRlRg0opDL&Xz(C16F?O>%CjG6lGBk&nWbi|7vKg+TgiIJ?e88nzmpSI4L6z zH>0R&6iM@B(PHQ;%)^Y z9`=CcALMb<9h!$=**Q1Onu?n93@o$OHo#VIc#+(%gmP2Mut-BRs4=lUvU6UEaLjqWFLER=DgC+|sPv>UK@=$Z$ zf@SU6ZAdfZMQyMG%jP@{DI?EV0lERHqk=k4OXD@e%Ct4ivwW!o9$AM zqAtw<%QJg-+1ZGPb6}+eb+>ke<~>*e&duwOLd~Hh#I0-hZ#ycXhXKoEe&1+mcZ$-0 zl@NCB-3(~7VEIRzFDF1V5v&B~1Lk#@vrdB*XRbA`HRh?sU>v z2j^EnUR(z&#az3(5p`)fSf*^b&v?W`Dp;D}p1tcsQwA)vbNbEGs5w=^igoSTHl`YS z7_h9<;_jv6?e+)DHr#*nZDmy%2bVC2H$N1CguZciiItP|tj_Go5#6x+o3W7Z^ z@yH!_uwpgOdT&R~=>wLRo9Cff_0hwCm7FH`KUadHmVuQLKBxL2Xd=LhiFvjp6q@5; z<+nb%c4Alb|6nDhHPG;Oh&`}kbCj_cP^<00vJdGyW&t#wU?ppw=T%2uh+sv!_0_$# zM-KxQm2P~~9<^u{SbW6XW6hyi50-h=^SYMEol9WZIUaM#>W2Owtb#QAdjY6RsbD4N zsKvF2ho)fZL;CTbp^?C%T(Ux=P;TG%n@7A1jY4q2cn2Td|qv8!`ppF{HH$m)y6`6Z*No~Z#FtBVxxwl=RSpim}OD;bXHD?1@F&%Z2dLu6` zgT<$+rPjcI-2uxwbm6^VXdZ)=ygIj|7c?d7h611=7QSrAdvK%i?@GSlZA<(~BW@vcaOF z@}8tXV+)qnX^M9Po;Y1j`}n<=no|)C0@UX(}CSMNu=s^2liJ za20i_4On)0hrL64V_gWWxX=>G3N(ko@^O0gG#xePELis1LA?{&p@#u0CiOrzgY#d( ziVa<|{~eAk+TyuKy{dKznwDV2I!%kUMP4ig%Rj^6dS}d2UBU9q8|UYTc!&ZkCA8$k zM$nuGE5<2*u>pB;11!CEaGnG5;s;nssRx5**MYY!k7pFRw7)MjWx3SaBIGCY(TB8UR*6-uN;vEKoDS;=`;;Z-wR-SV>N=>)b}oc>-3T))2bO z6+KKhe4eR??BC(>R0N9(TlVHDG&R6VjCwuzC^S96qMTa1R;{Sa#8GV`OMXf#u~qv)UKr-4U=f znXP-+W1gA~mi5btx&06im%xe%E0be|-1!Wa-uWHX0eN9L1aa#+WK4PFMJuoZ%||SU zpcb_UD>5wjdLL*ygXJ0h&KX}KQ&Yf-be?r|3g)Z}VEJS^X2+r~1%jo|_6xaO8Z{Fv zbC7lL5@nHyuRHDF>E^>rg(l2YMK=63j=p-G%?^4pw4V$b=+l`hgV?{r>I- zXac}Wbe=sp7kPIZte8xvpvs-GCJ9zx_M}Fg5D%GPSqGPG_#K)GU?n(z=+GQ_Q4=hG zSFfEG$ctWJWtoq;_dzWZz{(E`E#(T$P_RtVAEs4=CJd~6=Q*E7V9rVd%WQVGUxB)` z1+0YZ$rq2IR=);I8(c1FIyAMw%5whrcqeL36R_f3%N@v~(Zhgcn|3_?0{qtqu%g+p%#C?m8~s06)@hmxCAwox60EH3DL#&fhwotd1(zTF3YunM z*=h>r*dZ@mz)Eo~KVTU0!Ursmv=a-os6~^&@(d5}(Gr@OU|GlPsyzjoIIui5^PF`E z{V!l?(_BU^LR~rrmTk^dZZBe{qz3sOT!Hn0rY%?=nom{NpyqS|i+8K=_CAXq1}y)y zlXi#Uzvh7z5FU}e8=9qHX=8Su%7NwpSTU{V-@VoV{r@10KP_j{C7x`7S`-47DLgW$4m2yl@{8Hy zH3FK`U?sHe4;1{Bx#v-h_Cl2No4lvC|-E27(o*`7$UNHOC7q zAGeAvE|)|P16E4fsUf@Jzhb~LhcCBZ4^1psu`%+GN6=gctDyCQ3W-h7|AUpBmZ#_~ z5qn_8<;=*egm`EPmP1ITj2qB+gO#HBTEHMLe8GxwtCZ}HyjTwwpMLsVZPcReU|C13 zxKIHaBUs5X#*G7^c?6cdi2%|x(x zmv7OtQFCU2mE>kS@t8Gw7_c)eD2KJ#UbqpR%A%!T4$h{1D3|+yVE<=oMm7YxK%Fgh`cxmmQVWG zwiR&xBv^hCt1Q{Bs1IOytlsyi3p5|V3Upljx*_tS8(3cH`QfKgmr7Q~J#%NBT7Y;M z1Xf~5mBhBt1cBw_^5f2U)SOjd*}GRU8g093o%#XMyTmQU_%ug?upGr`IasXBB&G*Mv1xKPucqULM{ zOYdH_XMN;FGFVCJ=Vw~t{C%*pLZaM1;y4YgxYbG(4K&tZWjQWwUKM#U9IV9jH&^0O zm+FBPlRM|fWW>W1u=n6wnbM}H2=w2;nV-@r;U=^fau$|Wn-WDv| z(CGAu(7XgIWwmIhar?!5LR5D#Hs1%%dUR|A^kVA;3XpLhf{=OS2?wnoFqn&@G` z@=Kj{?g9K4T>;N1w0p!AXo|PPbC25paWgc|VEH=*SuN3`{|C!6obVtsn&K}VpwWu^$aiKk$b%v%Q zSUyn)qN+gC39LA$;62~m(Eo!KknzDI3Uz4^So*wx*#i*|JHaxC+RZA1+_?u1E>#$mt1EI+TE7|GLhPud$e6S+5wb;+~(Zhg6ndffl zgj!?|79ZAYyc0AIU?oKzx?K{vGXyN&IrPQLuIT^4GG`PlU52_e1+0X;g@?N!9!`O! z4Xd3%A$MMbML8$5+kl$$4Xi|M?d_A17mdMkFwb*6UJBk8EKOMNlKY@(4^}}`!j#3( zi~~#K92PhlbJhv4tTR8=qtN38fR&ZENPiNw`Wje%VReL=(0m8W!TIn*8{|cap@>^; zolZH8(Zhh{Wu9+#L@jay%O|YQ?gr3w2g^SCaJg^L_=Dx+9Nxg(6a7C}+RV>q{W0_~ zuxwv0{^*Q&cnDT(Slth=p(%-Z)64nDtZ>wvN?_T!*3B4>yyy&8q&Z;zUihy*V8w)a zc8i5Z081ZzM41N760l;NwT@>gtZ$YrG$X(Yj6Ul55}FlYB{{p^Zil>h z23Bn5S7t2cshwc?zg+6r5b^LHEGqoomfq0R2P@I}Si*eNoEBgOxYn;V5P2~atbFsr z^1J?njtT!!5IJ3?^*-qsZ(I(X9rHHf_NwcmVfYlb4O^jVA*L-eDFif=>eA6 z)qZFfKCXvda$hB8h$^~20aW|0clIt7RUKWumZye zj`)J(7_dBIPF_id<~&$|n$Dgzkr!3K@=v4Y_ryH)5G>D}wste{b{m0}68x}63up#| z6`)CM?Twl<5-hD-qYci;ik?gXJHSIIbHsx4=r!ba@qx zIjaF!acOjobEr#MUVn0GJUYG$n$cjHG^cJoK+Ty9mY-YWcBYQ#VZbt{ zElc@~+}sUTR=BVw7aA3;xR_HVPD5h`i*oE5ZjHQX4OU7TbG$v~srO)+a@r5^K|FK< zOB3=~tPRanu*{m%llvnt=7SaM*5si*^1=v~b$Z~8DX3S6z_N|dyN-nBBv>gir+2l1 zCJ!tJM~|jKn6q5L;?s+X2T_-pD!6A(hi_&MH4`kKkSAZ(LbCuYYnL-+ub}3Hf|cyn zbk^oh=wZOprUzBbwTHI_%Ohft`7tzCz~a}O@rZ}!Ggw}Z-LBBci{4;q(k;F;!#q_6 zEbH8k>jxnohJY0llDf7$G~r-rUCx?1AurZ|MY%VtTm^Y?0W81t-~%I2i*A7BA2E2o z4x0O5Y1W*5-WVFr9(BO6`?y({vkYMQq+71tsYUF8rOnm0y=jG-306{w*(DU3bzu3q zoa=KOHD?D{4(`npR&+xT16FK$$biR9;cdZ+i!ju<4$U*LeAb*>x(%8NU?n>CDDf6~ z=L1$uI_qNRjx|ZJ{Bm6%bVoe+gH;fcmSTb2i32Ov<@`%W09eT`7sB?U<|Kj@>E7bQqTc9Xz@jq3R$PPsDgeta6#p|8XsGsh?yLP< zp$(y$gJtK`%Olec{XbX*>HLhcn5W#qO3rn+ZHIVR0hT^Aqv8i>PJ>0YNovvnd2to2 zME8thKFEu&U^!%jx9EymWNCxk3?1@C15GKgtfOY$oC!@kusof5KNpb~5n$P86t5VJ zy5tEKmDlOee$?tsU(nfYl?V~!Ab~yI^a1p zkHGS3b7`zC^5QvIcAcKO4nkg31S>Kla-s&cs3usZP%oE8&@=$cFKV{+IB0ZWnVdZ9 z^+sOo0xK}%*y(5ZY{!A+mDlC{cErqKu(Cp*y&Ve83$WsxE*D>pn)41UAMG>qjpFEG zz)DG7URr_wY66xybXdw3Xj+368#QNN1~kLLGCTD>d%h9+f3OlVjt?7J46z4RWM0=* zH4qQy!Lki|z9I#h4`8J@U5P8MMIY#exYa(N-yM1302XgvVQh$6mT(71w? z95uJM2#p_D*3SKgcSK&C0V^xx1eIcm*aIsu&!bf|;^7up9${H6`a)9-^Cs_nwd+FE zoYG(=X|rmaDUEOgOJiQya|`@e7qGNp!z-?WrZ-qr^t`;=(98u(>)fA7z?_u~mTl(A zO`ber53KyWZa2#y9?W3*hrLL?2#qyZ8s}@zKBMMT1*<^&;&@x+MR%}#%&T&$;Cz3u z{K7^Yuuml<(u)LxJ8lQ(I8mw67 zf!bY|v$DbR&ph?mwG?6xEYFuctZBrS_^sc5v;h(({qlXE*%Cd;APK! zGZ7D58$6@%Tw^O}oWV+RzHw#*YK|LNfv&l0x7(tJ0jt0qowT?Iye(K%_~;eWp;-u4 zV)Vi>hoDiwqBO$1wJzxY4H$psnX0WYPn`$L^s-m)H%(A8!P121^^~FM1Xh9bq8lE_ z3s0~TT=QINATQ>DWuF!^`Wtd{8CbUApYHfU<5wKdJ$e!E0L=-o95njs<&hUW)aqtnnSx&h zZ-8b9SbEKpGHopJ9RygmZm(vPL0-gw6`0ne!eG>*EnxYFe<|+`O)yxVF-yACf+hv5 zNX=k_1@fXXSOID03)Z79<$Is4$f~_5t?ye1!|V=RZ(+hfaT$qUoo&Y zdKj=0(t5_z?9R#AE5BaRj{(sdiCppT67Gol<;qT`$Mx0 zEK|&~#aP;=-hh>_5!bx$jQ$@ib6S#9IO-Bt75B{PH>MZjVIWxAkT;__7LCGSYvty%O`?rejJ+1U}@I`51Ru` zWw2r#hfW`gyciFbU;5?p1^8^6f#s1qu*)GW)`h@I4td{c5;QVc{w^UGOCc{3z|y$C zZ;{aqJq%cJ>3y#?MJ>t)D<*=z<_Jv+Sbl3lsQ1v+2P?_3>|%H1#VoL5)32z0s7r0Z z^3T;hZH{;d0*eaW*>4~;N5G163H4cunsW{;ANLQ*2IR$Cuu{_d`R=fRw*@OHg7Mu8 z%~P;q*Mx5Jg{Bo)1&-zF94m%=2P--KYTp<5Y(2n=%M}*KA!ee$atMukeHEGuV5PW( zaX(OVZh{r#{&C`M(!=z`=UHUGZb3vbs|LuOH(+J04(sp?8ZB5w_v=y_*YbxoMeKo< zlB=&%3-PcOtfKpM`8MG@W}xPzf|cZ6P`VfLq7+y~_v_k6TI|^b%@43_qrx8#fu~bv11wI_IP2gZb^e|u*-LLBxYT4Zu zxnorV&pj$)&OK-bgH?3Du2)8Kv;LT;CW58O8$7HW;^8=0MfdCaw2930LCyIDmVKws zJv$&T8h}-Fziwm_CZeqsG@trVF z1%VZlSE}?x#6t>LMfd9_wOLVnFha9DSh3o#!OgYkVZbW7UzZBw8!UvT6<8@zD+akj zvjD83`*rg(Zk0Tax)cRgQeJ7}b4$c6Sijq^>%8*RP1GDau#&ak8pgSyhXJeTeqD#K z;^#j@qXmnPURiJ$nsBg+?$@=;yuI5I^VC+b3i8VI^x_e>VEt~tuJbCd9>|NPU{S8$ zlk7U9hXD)!9s$|vz8*jMmkY9g_n+0eB-KT>QR~Sj_x-fxyQ9ryvN``d+3H@AF1pj# zE3CxGDY$ztj8YT5D!L`^z7C@-y3^M)^G>(DsAwjP(*C7&PD%+xJVyDuoxaYif1E+J zYJ*Waxc(TurYpK1jI!uX-^j3%7P-(22FoXUwbLGG;=n4p(>EaVZdQHFaL2*YzbqTv zw-h42EvkIcoxaXd8`~kHx`O59N^#|oQPaRGy3;o?%xdchXhwq-6CHJ@4m1bAD!S9x zlzDI2CU>l6f))6(T*FJK@|D5*-A-TU==KLttpVEt~VuV&5c z9jI0lz{+>s-|snt)ex|X?)3ExFB5PLnsBh}W4hP+1kGcxithB)K7BBvIJ(2vVA;O1 zVOyg^(t=fVr>~c0ZIznHsM%oIxgB^t)Do*9U=`iz`~Q*l-a%2bZ{IhFnk?Op$!^46 zDR!EQii*1y?5LZF9qT3p6*~$jmrAi?C3ftPpkl|8Y}SgMVnLeN5zLDHIk?|>?};TKB%|PZIu+*k6U0Ptb8YbEwUdk;FoTCtzm^U0a$9&H``}-s4MivR$xW0H2-8^eum#nEnzbT2o+UzjrI%15Tm+E(m&|_RCj>7 z1(w?ME%e=AzXWDZF|fG)m*2L6zNil@wdre3=1D4-Tgxz*3vO-pRLGRY0a5u+W{O z$DM}EBw(pcU!UTwnvSqKj0BeJ$7TWVU{+rTmfG|U^c!E;n_Kli{DG|U5**3=77BX{zr8a%Ti?^j6fVs2^Sb-ni3#Pyd={~U3rZ4R`mO5Vt z?p{oR_arD=N_2QG`y9gfTcEl(S&>4L&yvO zR^hhLM;qaP-V7|Y>04dABhCp<t9hlWtV5v=C8~^eCEuk;E04qD_#)0a# za2f(Ewdw1VaKDcMG9!WIx_x|vJ@gI-EVb#|u4HGcH87XX0L$i6t8oKhg=7~1vsz{P zdizhv-vu+L53ta_H~TTr7xRFnHhsera$eqm%p_n1Zl6#u1~Mmsr8a#-OLpD(1Mls1 zV0nLPZM_V$+7(z?YSXvp#KjGvFNOfir?2?>Z70|r0ZVQArX)OA9srrSz@oQLO#ca) z3&2vFzVRiyr!0cGbRSq@pW1Bg(HBmXfc1~*+tcWl05fMiu)_OF(Nmx=)&fgy`pO9p zwPzr+7+BnPeX~mT*jB)M`y5!wpW1di-5*Yrfc1~*+mk$32l`?Luu}S( zn-#zsW(%;?rf+q^ql+GpSp%$`?c}Eqka-L&wdq@4vX=|*2(<^S>`(0;wH*j2O2GQZ z^!1^}u7H`t0!!{IUl|2`u@6{k)3;sX;|Yf$vl&=6DOAFA$h-!Y+VpiRP3`j<=8_dy z6`$HiaKSJdfc1~*>oZBQ!pw;VR&`$sc?Z@or+}q4eM1wUd?%oHIAD3FOzQL!GM|8@ zHhqIjxtHU;q4spp3!gjGYYOX}5y1M#^z{kbVuG2I0IYWXZv6^^zPJc1wdot5_%vlZ zWKIGrEG6vrSjhYYmfG}+R3;DpvrJ%R^}CyQDiEX_V5v=Cx1_uUw$M9|fmM+*H7y%5 zEr6vqebKUm69V9Lz9+D%-@p1fqb}4fu+*k+JKt%pBcU&@0js>{J71wtx4=@HzM;O;Pv3%>a}QW< z{qHy72Eg4BV5v>tsH6g+5;C8F6|!S`Sq5YRfTcElql*rwc);oW2w;VNcl_YP zYSTB~cgC>(&=&>33htkC)(!eXGYsBOmFb(7^nBzT$ovFW^o|*E9UwCpSZdQZz352K z!!Va9V8wqZ%FS_tx&@Zn^v&{}`SBvmoN{1A^?xvCqaN;#084H9<|e)PQVbc?5#Ey> zGg}>i%vfNlP2a+zw5J4|&d&i>)`#LngJGR>0a$9&x7;^;LpSIP1z2hQAAZ$9U$g|4 z+Vn+}UnUzN(+F6FJHl_^keLcBwdw0xe00Hfm`hQ>D*sT@_&UsLGqBXAubbbjjwfN} z)S3!?*FUz_CmXoa4=lCm>y!M->j`990LyhJ_TVpM7+|SQ-@xKy4P6JrX(q7TK9-*G zgLTehV5v>tV87Y7T0>vB01FL@+cyLHqBpSArf+z1;k}`d@d8%hPMnT{%nD$sO<%hB zxDXF>X%n!5KbDOaU{=2dmfG}<@|!bjFU%Y*uzZ5n1iq>Rclv>)Hhoi)U(b^u696oF zr`F{QWYz;qZTfP>Cq~*1gVRi4MSXlz)e+V?pMa${ebfBrqD`PL+5;;*XziHGy? z#mD^p-2^i4fTcElQUCb^QRs{Qz>3PhRWuU*Zx02Q+Vl-fDA9XEW-73vJ!aoq0Vl2J zfTcEly-ULOK83kt23A&#&7Ne)d;^x+^!4#Kl}5wN2?17G{_Rz_VI4IASZdRkPAGkw z1{oSy=^k_DeS;di2`shg8&)#8*KiHg9~f+VtfT%Ho?t?<@yap~qZD8ctvz084H9CYMZkZicz^8dz?zwF*{2rXH}= zrf-Tr(=q~P&TL?z1$VDrf;Cbku+*k+PQshEJ0Y_USgxM)j+aA?y#$up^vy1rx-<~# z;S;ceW9^pwfD8ewe@x$=5!c_r%vl7ikZ1R(?l6m10ZVQA+9Z}s^`UpR11r#TzJ4;C zz`h5T+Vrg`nTDN*x%3lQQL(j~vXJQrtba^jA9nIom^rb)ihg$g_eoeIB?3!r`g$k6 zoxUD22Z2RVP287yF*|20!wZBx|Yp;yaDQAFtAeM>>mw;%wk}vOm6*6mpr8a%Td^@(sp)U%7m6Q8)?2mr1{s)%Y^o>s1=g}WBHcl{eycYIr1AQ?H zSZdQZzG!a!6;KaTf#tf|v7QfPHUmp-`X>8!k~hQk`XjJx@}4OXFt4n@Qk%Z%N&D|y zgp3ogD!dlG-V1kq!hoeVeY1+@Wmwz5%mh~8YNzyc$Z)_?o4(n;9y1-GFMa^aJ1=+3 z`$4e&_k^FRGJOk^4$SEW84OtN-civ_pfBbDOKtj=7tIebK|L%77QNbeczej41eV(L zt?>1%8wb~GWEy-%d3jx@!Mt(>mfG}nO+ILU5;ARp72>_PSu))9SpqDz>FZW(`uwIN z%uHZ$t6e_rgUkhBsZC#ZKd(c-o5I~tU?u0}KYTs}*8jj#o4$d`hmN*}-suLcXzwLg zQ0R-bz*3vO!Nqi97}UddVCAfKO~4>y2A10N4e|31S_pfq=D^C%+fbMdCD0LAYSWia zJ{-IkGJSz1cUnrWg1bIjfTcElqly`iJa3qpz_N*{*J(3k9s^5l`bPV8F8)No-B4gv z^*^{Hht5IBkqiVddL8lcTD};4$wQV zfu%Nm)BU=vp4lAkh62kyKat?;z{~`e+VssyPFs@%nTfz^=dpZO1oXuzV5v>t++udl zt?qC~8CYR44Q9te<`b~grf;EN*XD&S;ch6fLh_R?cYs+m5?E@}*Jk6<)+qE&IIu!J zR`|USg1slO)TS?55^3Kb>ftu9l4BZbenaLbu+*lntADrbQ0R*>z>3aKHXg8tnF%bl z>Fd4m*v)9jEC5!#$I85^&=)eW)TXab$%3O7{ou|Vu(D$sr7eIA3fE1w=^NG>Of;xLQm0!wZBMkO4d`W-T>ftBSE9rb)L>^*^{HhsfO76x~NdME`}MNH#C zpCHo+SZdRk_V4j$F!Y59ScUnUc5ZQjnF%bl>6?~tqE-ZCl7Ln2v8u@!=!+s?sZHOM zl0|P$K+RMF%RRP9*$l|E0G8VH<@|f@x$XgXLxJU5u-VrMX3-j8sZHP9gp>Q;L1s6w z+&sIshD#hgT>)Od%Jh{>qSmyAdiV>hkXTCt9Dknxf9Q+4zhKD7h%g?%8rewFFlu5`)G#!wGU zft4P6yK?|!^uYSZ^!4#u(h2(F0I;&2ZEL&;=G94HsZHPT#Pk)TA#(#*X`Vf59YDY` z2bS9O4J}yfc%O`M)BkVnar8a%DN|*189tL|HUs zpCll&0$Bf;zP`P!&d?XnfEAUybJKX3SFeGkHhtSAW&Y>^nQy=f_wrj22YV|6u+*ln zTbcK;7qww#0*i~w8L}NR>w)!;>Fe8P>)O6>Yz?fm++Cd?Lb-nemfG|UO**sVG-PT! z!_4vW*ZqODbSSXYrf+ar=Z|Bc9>Rf@6Zhb~3-nG3u>LW9eFMAMKwo?TR&MU@96ijU zpTJU^zVS(CecC~$9t6yLs& zq6We}4q%~qdrUVvz|8c5pQ$o^vy#p|;~+x-tJ@>Fbt! zzR@Pg_y8-^yU&h7SW9mJmfG}nFYZ3e2kK!ru+mmPnOO;$JHS$#zU};i(u;<|Jq}={ z11+OKtiFCtt|4hu#?gta$H0pYhNaJAkD&eM5?SpczmzM}d{Q`f1%ckjVp9 zy2|to^|M}9C0ZVQA#`}FU_JO_#1Xgw4{&fV*qQSsYo4#qu^Cr)POc=1rz56ZH z!$&0mOKtk57yAs{4>fZGSUxeiK}N__0ZVQAX8BeBJXaUaG=SAE|3LfgaB&(7EVb#I zn>^p95HjVP0>;kiBDzMb1FS^l`8VZ>uzzXvS%Jhf6xC<<`>FZkJ7rPni;RUc#V)9oHg^Uxh z)TXbS{}11z4sfOctoZyxGvi_ZMFUH1`uc37dq058T3{u63>cN!2`qD9sZHO&691Nt zP!I2bCC3yrmmq@yOKtiF`~S>s4t)^`tgQUQb!}l5EeDp`^bOz0yci6bEx^k57+Bc} z`r-|+)TS?85^y~h>fsx(s$-sK_l8VcV5v>tDF0uJcEa9j6|lwjRWP2a)<7WaqDDPXyK2743G7q)%j^{Y(ZoRZ!)3!xtB0V_22S8^t|AD18eFGC0jPC>)8CcPt z`uQ%<7dWugrmuHtVDFhw4;_J(6SH#fq(x&kb< z=}RXrtUd^tC%{VgG&sfkgJljZwdosH+V{mPPnem&Dvy0#umdvT!1~AZ^|9Toz~1UQ zuqvLBUV$)+?f^?|`f`bjw(FpGihxzGNqxvH z0MNV7`I1nszV5v>t?9%@D zgRZd00akEaantpXSq-dzOkW?n2W8FSOaoXUxs&E~hFMewEVb!tleGAWGxW|cU3Zdb3BH^%mkL& z^z}|!61f&K8W)&3Uc>*J4Sit*mfG}nFB>rax)1DefRz?kI&K+cb_44l)7Q6-OKuxD z(*Ra_?&RYwU>4bU!_QQizF|pA8`wbaGzJ#u71FMF5d5|wu+*k+NZG(@Kd6Vrz{-s) zvwnrlQDFUJ`uf&A6#{)>Hv>MS+$s89PB1fpr8a$&la}#OkZB359Ip}PiO?4dfu%Nm zqssPYWZTc3L4SsSEYK8+=_?n&lZb9Y&u+*lnjh|-y z1=w4402ZA$y=@(sMcsgY@SLtpF#mfG}neWQ=mK|P!VR{H8) zulqsfC9u?{ueYClhpx~UJ%PpL&5)MESx{eKsZHOI={kQoQ8WbeBC(K=w6150iC zh87zdY=e5Z0Ib5*yQ42c<~^{~rf-;^gXK8vt%87+lQ(m^71of$fTcElqmx(NeFT|l zz{>VUMznyw$O4wy^o=hb!aGAfn1SUQv!_`%$W#MMZTcqrIfl1_z8C>4oBZ(FCYVpgeY1*(j#vxz@EBNuF?+9^fJ_~@ZmLb+Y(FPt z5A3ZdV0q`yI`9VeU*W(~o4$p~F%BYRRshSrvnHtl^uAkz$3YG2pg-(^4|?5(1Jm7G84b#6mU5z2r30P`hH^kqyq%QPD9I&$U=SGi! zd6fh#wXaKWTvP4|nM1&mJsjsQhrQKzU~wv6H>#voz!L;!W^?$AV)y$cLBiVcwkj~I5}8hEnT-CyndChn^t01Fcj*c zF|fR259A@xI|G5G_I1$j*L2=_REO<4kJc>A~@~!kT0E!QOe*Bk`<1z3i);u!Vb(jDBmXP_M zM-dM=y6kHQIBWt|TF}ACJK#0cJTkfbOWE5<7?d@SB1Q)sPF)AhS@X!@z+D0Jcf;Rn z9xd!v`6m7&Z0c$rA?!2YNN+o6s+vay)4O^(HG;p_JQ6s#vb^mK7)&*f{srYf+P=}c zT8%EO-Tl!~{WSjQIu)TXWo_c^!SK(j1JB=gkNLa)_*p9&DR8k!k;bhr2*DYw$83J z{L3j@O~pv5<{J*`bwB^S_Hf70Awh#Zd-QdUX~>3*T;JXGx%+)>%IIlB>-MxXKpHn1 zvHAAUiks15(3X-Z4Qh7|?u@iP6KZ$Osq)UM!2_O7qc;aIVI$y2{qM{EzkX-H|NL+V zcu_U)YUX#_Mpxj^&)e6(BI@r9$Y1t#e9bRZ+Q>D(O7WrQyAB@L{1Qbl{K>Ypt?m-> zQnyC#9PvUICwoUcN3bV*+x8do4~G9DP0 zBds%XjLYneB4QG-iDF$7k4+HkIBk*pM7-B+mU~CM(`}M_Ij%z5XFN7WBkeLC8CN20 z6?@ZJj1o1bGZ-nNrp)So5ubG1yR!PFI=u+v&4p`Y%E-CV7iLUto2>DPaYCc zt>a`v#5Y~)mm*^#(j()wF#+kGQE1#?KUZvGx`E9R8=J0Uv&BYE2j$@rKXnJ>VG%!c z`{kjI8xh}(5@RyrlTmC;LV7BgNy6rfO-&*;PjoZgtR5NhM|W5r5%F7hNDhhkg#={0 zF>XfuGs=vc5WkF47YoLST9b^?B5pEcCOcc*QF%0Lqf3)VMf}wr`SQ-V4e6cn*0>ev zl~HcoVjn3sGu_5mkucrDB1E0j33(iAr#mi>Wozk<$zvRMAbm4F7*mkIjQ7UvNFSwz z=^nOFY;L-XEfC#Jcd93_b#?v?mr2Kc$l#2xMh*$is4}J^gOm=Yr`QUyz3Bxy6UpzsjQ3c z+?OB5!^p6V@5V#O(2Q#1LHk(I%T$2Hh@Pf=Y_;g&bV;7cHqc#^XR!5k7v$-VN0AX3 zzl>=}NXAd&5oEa1#q<(eBX%~uz~V%2)AQ=tY-8OOc^2D9cUcZ+8zQ4K{u+-XqcZ*& zk0B#7e!INJ{u8^I3bA;xtLYWC)~>1Unmm_v(`CzZ*e1HGm9>ngkg>e2@gy>aw=tfu z-yrrh6=Ung9;PB}o!H&!rfgzy-3@s@i|MY*^Bgmf@qBG#Iugp;8BZhQ6hBiLmMHp~ zO0fjd$5c`s!4f)2W>}q0lxbFr7N0(|c^Q*xU3D+a&gKx+5=QTk3Ah3)vRB zTk-}&dnZ50DeAF7wIZFKkK#cXTcT{((vg-qjJ zj2DrqytDBFGKF_?`GV~bgG?1zirC-u8QX5xUiUy=#_7>&bmA~mi5-<$}y~$?pY;fl#uy+Qy7EucsJus`-9?e(@*SxIL!0|+b<4v zdM>YJyXp$$HEb7Mz8vRhK^R_Zlo6W8jb_B8j5Phh4vQm9zp+DNi0N1LI<|-IrTib; zUH3wcXS*Shd^6*1gyjk2EhK{1+1O%7#nH44mL`re{l$*h`RZQF8(1G*p}d~$se4t~ z!gvo^$Tv6MMHcYx#yj>W#BsD8c3d1w*TRm8W1LFlB-US7EGM#lx*|Ej@d2`!Z)MCu zqWG4^`^X|?0$mq7C61@-U?;^;x_0#@wzsZK-pKaSmCDI%0J4m4YkY((<=Yq^B1`zz zHX68y8EF(t7bnsPcG|A5?ybCq4b+v(o7q0PHC(1bbaiCINj;1oXQ$>Rq|d|ud9^z zIKD>y4r?D7W z&-XAEA?x_=Hchc>;#}Gd%NFO*O|YwWBXr;8gKUVdT0X!I*L|z>GnOHVysxnoN#K2q zCH6N(6OCgx#Q8LaT^Hv${gRKcqjW#z!|X`i5BZSeTVx|2U@S+Hd4J;@BuR;&2}}|h zT8D`uO>3)vl9tB z`4l^Ws3o6t{DSP@gNzkO3g6%O8QHEZq1#~h#Km-L?5-F^x2on@il{4Ruq07OPG^nC zZhnxl3faXEG*%)z`2p}yoCo4Ex*e7yE~VRI_w6PV8aa~49F9B(jwLsFI1 zv?umdTt$0ePsC`tQ}qRQ2H`B9XQvZRauz!cImiz;{zMM&!;C+W{rpgy&RCupOM7Fv zVhrttJ+qrd)RQl<;e@Mvk)27nRE{+MK@RgHjK7gXe2DRv{c~|G-4!bk*U(+Cd@;_s zp?sB{Lo|@Du(OH!@?|Gm5oI0S1A8g{M|a0wi1Bo{>g()$qOp99 zokuj1v)Q@G34R=DhaBg}lC_Xy{1_Ww?6tUo_Q49p^>k0{l^sJgm2a{%;U?c;O+=H* z31nU56hEG{6noe2-m1w2<#Q)kDtn(@0k&i=RrmAm@~w^g!&BxPu;meH2sZpz4S03Zjkt zfCWuT&S94!m-v}v1LPt0qov+(i$>9#T&ST>U4>^~OMQ-vY5=U_xQzRYve8;MYeLNEuN+);&$RGdIDZcJn0-DzhO5M{&E?+iSUz4o!TQ0 z_+?}}B!^#0wngqMJWb(s#SEIn>xk*JvHBgmjp!}EWw#Q&oo`TzpXPkrN&+IOuzx;{aN%WIHI(0_!_*l{#$>n26FXWkWfu4aoi|6U-xRaPg zPphtE_Ywo;FYF#-fLy`uMxOI)$*xENzlQ9BCOd_39Bp`mz{o`W|Kuh6sc`r>8hA@Vo&0AY}=?0!No zD^9-1YkmXigB0@X$)3n75B4xIO#aRuB8JM<>_Mc2Pa^%1 zVm^`dLyGtW8wPJG-lS>VO}s&y@FsRgi4pQIHjN08f3io2;gy@n-bfk0k?e((^2ua? zgH|-tERKs39f89oz`Mdr@qKrehV3hl=GX(KFAy87QGNBL<_wD zuUQaAey_!zBF0*5*^|T=iw%1M`M_@{`y=o9ZDc>>9lzCPG2UFfLr3B6;%#~n-pnq8 z7;mY~rW2tSJN7g&u5u?i5c$OKAO|2H`4lq9p_O=_UW&IA@6k)}7UErJqXl6FVxpxk zEMX>C>NpKXzVN%rV5EZIMGitfD-Y=vcpLEny&P{X=FrQ&+p}j0%A#S<5TpfVGm)=+ zDrrEf_`ReaspR+Atis!ikLhToPfWHrvROo!#eqFXOsd>Z4nwT`K5{6c z@EkeBp_BNGj=?*MPwCZo2l0vXG>Z#+k(g?6W-ky^EKW`%knj9KG6bpS50Jx=Z%RJB z2KN;6=s4U%%mwqkK6{0jVX4PnCZ=0l*-OYT{s=h=`NYI$avbtkDWVhc9^z{{0q-sr(i^^GtVGPWG=&Y^ zJc}E96RpjkCMTkH{3&t*T8lqvvk~_dOXy_WM=Yk3@Sb)Sg0W~>nV>B=YbH#UJV~K- z`3#ao>+tEM(IG&5LvP0Y#WH#m?kAQyM_QV(w+Yrlu(yZ^i_U2>s^K%qFcjqlauSLt z@91rKZ}Ba?74IdM(_6l`VDAwNEzQ}x!~%;udk1yo&yiD62mUNM1-0kT*zCajiXZ3{ zJWza3Z^!%CJs=iaTCq7ql%*wmpIB6Rft-Ol^XJLws1u(>PICwnKhwML{^BQkC*Du| z=)BC*mVHDlwX|U$5=$(too1o+_)BCs>dIdvXQD1jCA}9PD1M>$-~+@8diVDZ>{DWe zr9JzESZ-;@K1LhzSIIeO1O5s*8?Db@w%Lad7QfOQ9xPVTsrVqfJYtol6Prs!TRO7O zh?SMs$@yqw{u((CZNz7ja~+0=-{=FlLA28QalNQG$6CDD0wTuZ$>tNQEgnt`+LXUZ z(x@AMgEXN{lppkAe3+MK^bMxpNfZE_LX%vy^+g^!hN>67>v$%a1h z-H$CJ5-q-LDUo3DVN1|f{C#pM+LFIVE!PIA$5+v;*zSe;`xPK>j_s9qnUn zLEpm{O3mrJ_yWnDzVm%TWL@HvWqf2E;-n=svNjsTewx;IrG7=@yEfXUV;&f#txfdPCe;!C8~ z&Y6};kq(4lp(5>T%HC-oI+*`Ta%eDLMW&*ItR3j5_zI~#{RCewwWA+@pAzXzoV84j zbRy1J!Xh2fA^bP;0BYc^ixfDyb8li$_Zx>1X)L+VzO@mT8f$M3!Z0qziGb z@&|bs9manr51~W(YVx2%tmH)(;4zXXosX}UJe)6CW=1w3E?Q5+`n0=0{@0b;~@b40OCun@mST1v~OII?n1xm*I($FI|c! zNIrDQ_lQV>kSt83uI8d2sYT)NhZN9>LR~n{nIP1$d5dq90_bu)S@Ngf;7Lle)LZ%m z3Xy8jM=H8hxyqhAi&BDyJcGi2MrJx}k^0c@@y$|i`W?PW>h<@oK1ykxdPl!VaZkOi zU+8omoh&$#S!kHxK%PSZ`A1)U+(BQK-V z1y}Nt!)|F1U4`$G2GW)IPHDj3C;Dimed=TVN~K-uBmD}eYv?SYA(@Sa3k}Gt=uE4g zw&HuG!L)+!k%H;3_AyGQ)MxtDO2^cv`c+B?bgs~Zyn)UU8k5)2+29a;$M;D?=xUsk z4D>fVRq;yA*T*TIsd@TX#UnMh3WG5=UuX)4+4BT9@}|QZOg|9;vVO8_j&ON7?8 znwnb~42q{q6Bz_Qt@KHKr{AjdPJOH2qV!5Fuj)WPMOO&z$tUP?p&j|yAyb;fIN*Xr zG4?eV!M`8%DN5he5Blv&VCsAQHm5vvmC%XIMWcm|hUc$PGkaly|igHkK?dz68x zU-Y|`0jU*LUC5W{8lf}!0*w>A$>$E2q?t?u{Gv32sgGZfrvFv+oH97|t3Fi;POZ}K zb$X5dCv+nV(RiUN`3hZYoy|1HuSm0)M)+kZoM~u(KpB$yO}}3;q+0d+6g|2@=t&l% z>xCX<5xP$3Zrc>UCe3Bs@N8)g(*(b&3{U-`Kcoyx{jNW#3{9=B@*~U8M8TIVMH2)c zvc%!0WMXjqhBTkS@axjNzrXcq%81lo`Xfq6>QDV)r?==vA%HALlLdeB4Vq+)UKx`b(i-;)+It?|236w}IHP~h+$2Ju9$E*!^C zKz9p+$SQP~Fp#W7cM1b++v5+UWlTFfM_S6X#qTSVID7pWh2k{&Oa;b%m7cVsdxgQI zg63~0!mRC6Gvy{o4qyC%{#yRNEI(JIe@U6nxmJxN|DcD35#(?5kPt%ta(FJSWxC=8(i)}#r%Z(G$Wr$__m)jHPO!$AmGqzW8ft1LK1iO6!@P_$$T4VXzFH&o$NGROWGR zRTHSX=qX`5RR=vOgi^H~N~9#lA1{^?89%&8N~qAmrj+5ddb0wHUcKb3K{Es+g`(-g zL<&JqTQ@Pi@iJ*6(+e+^l9>Se+e#$YOn*y(EwJ9AM4*|%B+3C51d6h+*#g-1#otO> zm_WQ-+RXI9-zbZ?7W%u&Law>~j>Qaz(R*sgw(P&bpHsh<}oHFaz+9QVJ7f|43QJ zwbegVmU3iQrkO{%7r31`x{F}0t>!N?5tl>KApDS^kcU1&Mppw8)x|#zJO6%}TI>L;? ze@cg$k@yekP(^orq4FQsP5(-X=ep`&IyXlxLL}v0a~w)FL(SG>%vk)7bd(u`|CZ92 z(e}m42Ck>RNLkPI(7#sJp?8EuR7>=>u#jqj-Vzqrj@Q~sCzw#JjdYwDhyPWQI6r-< zlF0e$OOyo8r)mk+2E8XNrdp$Sg(#|(V{Pd)Gf`_Nonj_vYe^?7dg;rRja-2Kjgrjy z>&u+mqYs2-R68_BSW2};?^}6>($F7U z={z%C>m+3{)9kC1-P|C3rLv0~sQ;quM4t<5sjg^&u!ib_<_mGQv$XZ3OH8=dRl3N` z)Ve6CoL;Xed%3~-ugV@SxN05M1AQs{M|DSE2=P=m$A;2XW{$RjbcLC%tuI}!7^<&U z_Hje>-xQ8B=&jDa=xbpE<%1Rq>#3gTE9-S;zP7P+jhUxyBxN&m?SCoHkEENwAM|!!I-p7lq1|o{cq(kH$wkQImCrjZK8Uk zWx__P7g{PLQvr@z$;`0YnoB^07H$Moj4{|KN4e4Zze*Z6O8>{XFZx#4LItAb!e*)u z`o?;TS*Wd95H8Tx+!l;<)qG@wcbXEY#6Iycc!*Lg7dMc7RRqZPs~Y7qL_`jAiroup?>jJBinlv%CqAU&y=Zg5r3bJGkiN)|WO;OsmC{Vp7& zLeOgA05u%_X3b~TXg#GoCQj=iW61q|%B726x56MH-sb+|Du( z&XYA7A(INz)La54X%Oo>W}CLR^p@GG?Io2nTO3*{ceq7{7RqgIp`p2QOXDb)IF_&oYRf;sNg!|M|O-tb(wM5fGxNDoC z9dEA9q-#UXcFbw*IOPqu$LIm4(?)ne!Gq9+9BP>(+{A_r%|vru zxUD_GT&H5IAy9eCZ87vw%DK&k-p;Eu?S;oww5FZ#h+3&>`^}y?tEJ2u=8P7$KTM`W zf8_(W-Ox{Y&uugGRo-bj3D2k)O-JD=wOZ3bcw&2AJK5~WWNE|94$L|2B;_-=(=b5! z#O*KyDId9%s(fmV##6|n;xryYE*0xI&FsQl)J`=!GZ(Z|%uW@148h75Znt5OQo-#q z40Qfa(^+^<#cRBU0&1sx*1igt#%9&=ec-R#O-axf@gxm1H*sp9q;1}l}CZo(^S zy{4=1l3J(fBD}D@rk!PO$Yg87%?+5V+L_8XZogruV&(Q3hA40es4AioH9dsaRD!0v zP)KcXoNI2v+|bT3H)gJDXPX;U95RF`-?@W^;Yu}kz%a~tqsCV#p^`N|LNS%3>G=(V zu1 zYPID~7{*y`xZ{Sg&O0=Hg%4DUCQx`!ZP)bq)`GdGU1)C3+|@2HyEAtjCR%HArwtRV zcHAk$cxx?9know>rRgtxqIPQf2_J1AXcwDXF*({Ob4%vFc9FH_Ml5Nq%Vijh);e5z zRVB4oGf?SnDyZF#%gk+=N7|+4Hq1lq5_9W{Glnp$hRZZeveqo&Dd&Bf!NOPA z4g?ETRH|msw+_rx?Fw^y=81N>xgGP^VXD=UJ7<_;b>Pk#CR^<_LxgYC0gXYhQu{S} zL9xx#t}=IGa<$Rsj?6RdN~;TZ!7$zG%$+w(vpR8ERX?c1nqk6s>X2rrP)!|lj5T{P z1=<+1CzG#TZT6_RYzVj3<1QIyT3xw|h8fOBH6w&yRGKD4_(>hn4FA@Jd8u7v?##T< z#+kjD=MHnM4Y{j^+13W!6~ioRea&d$FLhiqO87$^(~J~;+rHNRXYR%nYU9manOEAi z)+XF_!#rza?wVn)wGo&7wH9?sGd9zfI;k0xX+xcG++gm>6l>R;doV@Xb>{9BqJg$H zAC3nxT*xEufEz^a%sF|ASOkL1S$#k;) zqTOK*Vk)#L=KjoQ?RINh?xA71wGH>cu*}+;%lTTLx}uqpS&zD`nV#uNU2@!Q9>i2> zcbNw=mD-)=0ToXS(bo3dW5Y^oJMNKTh4VGdtjvZ~wkABY0d-X~^P8TrYWJE4Gm3VP zIhgtC5M%AcJu|GfcI2KKR#`h}=4LjbZfNFYHm0s?W@k3C{jS|-9>P@Dyot;=ZK~Ca z%QwVXJ-IwXtkr|d{ffb1_WaDIpft|Qbfa!M9yAYUergYxhcQ32`^`fuUKrx7ow?_R zwN`Jgz_7;IqG2+%l&qmMaTtT9ZzGvM+Qa4%%x~=>a|rXxVZF5*_sX!&+Le20_|Mu! z6PekJx~*X|3F?+6B2!n(R(sSuI>JVqW*)`-)gH0-Yip{ zW^?MUWlQ^Q%m1p+ZXU{tO-1|TG ze`b=$k+qhWU4iwj@BI{i)jdWonsUWGS}u}u**(hW%4PBH33uT#dv}LBbD7{}dfm;h z{Y&*an_u~t=(XMBdkI#y@q>k*lUWanF{kq&#-dGWv6g z-eci@T!Qy#xG$F*7Ikd1;TLqLnV;8NyXVU_Ql7i#$<ay?fe?;qjeT)}8s2v@)x);sYo|8Bh_AM4+xci>|p zjZ;3kSIUi2KDt-P4O2cK7s8{sBHr`ikz8T#x$ub8%>I3P7e14JuilxD^Y4jIb+3_| zrhIX)mYbw}cCRwVa>c!u!eh8%-izVUTv7Npy*r=P3CDNivp7kaUES;D7AfiOb#n8R zH1}G$nX8odYIr*0x9Y45f0gjCcY(itA{YdX!tel^~5^D>rVMo%FrhI^ASl`HSP6`sPC^WF?k z=E}l{^`5-Pe@GAT@&1E4>y~6P<*Qqii4@x{$hfPL_ilJPSJ8VXJdLa1y{-4*bNY|! zz4;veBYH1Bd!%j35BChi7t?z4yX1QWN|q^nQG9|8c!9 zpUZ#D6YJhCx2Iy<+vIjBKiyl6xmFH$TWo@AHu7+Cf@hqm0V-*yYPzC zV*V@o7`~|gvObzG;=km{;XWw$q_Vq1a)9!`}%agg8!aAjW6%Nt50>Gk%v&fxKGQ2seJBJ z@*r0muN9WKWUm<(InrzBv-ryXhx$yulK+7|gRdCjsQl3Y2<3~W!BAf0TX-wi&iggI zg=_1z!@<;Q{wMkzzN-JRKAW%Nf8;6T{zD#46^tguQ3c##V+YsK`y;%a>)`z!-o~|u zpX>Aa8vbYcJifaBsXo_zK^{dFai5n*Qia{;=HEFX8L@U+atc zI{sIlQtqqrc&eoPiad@g;l6Af;8J{9!uz=%zRcl$TzB}rzMOC1f2S|w>-*p8OWil* zNmLp4b$KFH+I>x);9`94u*}iEY~kNH%9mAN#W(hU)K~J2{2%lce8b38s=WJ_JcTOf zz9~hC^IWI9*@QxA3Rw>-gsWRDG@cfjo<<;=V7>q$<1b$unI2d^y5LxW2yZ z;lo@XpGV)sxAte~8~IlLh`xbu8JSB}cR!NnP}STI<=IqKG)9N<=gtd>s$CX{%?AaPmV05 z>bqabOQ?G8m-1q&F6s@3xnaIv!vEknU%v3~sZMz!eLLUY|5M+_xAXt-G;+U{ms1Vh zZ{%fE1NUp=95>RJKl}$b!siR0<%T11`Yyf`=F)fa9kEz_hx>!PifZD1FR!E;yWhzx zTw{EN!WX#FzJlTN+$diGeJ|e?%dGF=yI`60-F)ZBTB^DGle~s%=Kd(JrkbKf!~ST=n>-yO@U@8i2+Sv;-Wsq%WNrTdG#j%wllY+U0e`ih6Iaua;T z!dJNQ2&~IIg+b8}4(8SmxFhl=%I{8>H&U(LY4Qfw6kn@1F%F-7bsTVMRkT^=`0OMJD$FSy0Nn&Ic%B43T@ zp$qdx1K5~1n#LG8K&3!gWV#i$OMSJ&FH=Wg1@%Ap;aGua zQZts{!$8qf01ZVa0w^e}@s3;Rs~>*Lt?<8k`7qT7@)&8{Mql%A zD!0McEc}IAkCf4G@RP98`gMLHR!YAH<(7|A1E5^;F{(e5Q$FhAeXYU~ZnLjtIGx+% zYoXufr()&wTl^HPtbUW9963b|h7#qI)F3E9K0yscv9Q63KIinw2|i~MC3OZ?QNPPi z$13P|_-R;q&#zEk`3yA_N|H}gL!dl{&28}!VT%j;@UY2ANEQ77KMSj@-{)symGpa% z*ZBfG4En{H2IZi9^6#$gzBb`++%{iw_$#;7N9vFHxmY#*5kCj3sz2msN6t|rq5Mu# z#0bde(3s(9`|wY0r>|Z32e-r5HvBzx0ajCg%FoAY=uh~0Sar`BsE~Yt8Vwbc&r_qI z0;Y@G96=DSY7=kzc_M@ng|t_uTm4BV)7MgJnB#eXIi@3*(?rD z=z2DD>I$r({+3^kHPGMi%dq;MDNrf-1~nNfDPN~1K_yIxQ+>|m52yH?NpMa^n&=<+ zRaj&FJ--rbq`!m8%D1U$P#O6aH5Do?-*g@HF=r7jBz}a~2kdHl!rf$Gm>Z$yCtcCuCUxzjK%z-M& z52)EtMfpB83#wq|!h#W+1%U-U*EI2xZ}P)y1@$=u18M+nC?sjM4nL#q3ZHeY5`PDenQPh2cFHt zo%Rhlo5-E=^*@`C8pM)yi8-PXESl#oFp$`7Kx*-G=JQuc>8F9r+ct6sj%1bp7FTf^;JR8M|GEk}o+Ex?`kISGN>IiJ%kbvM@8h~;--os1ZM zC)Uxk25KaKpjJZ-<@eMosDW9SyW|^rwh(vGH{xtT?gG-y$i(l(x*Bo(9;}Puf||;o zsdZ2j`4hDkYAk%v8-2D2ciA_}h^FKBFtYIbvF=7@eqSVw+6Xn5Q>hJ5 zGx-a(9vy$S1b5vx?rd@Hns4mcVyOy78_}=ulo6fCP4VzhD>>p!3b&NgsZCG|vov?h zH}PyK?xt_T*^=B1Bw!%ChOq|Bs~BTI5GEUx2ssxIlmIzX$gaD-DQC-acYKr2mf>#u zCK(?75Z23x=MQ2%4U`W>Y-$Tc$QBiZaM`3Jbo$u}+WVJE3;+4{8V0 zR{jq9P9qP03L9i3@+Yx@Mgo5#!$s|ZIx4Z$Zm5G2L+t{6XX@|RP$MsY1{-1|@u#uD zp8Zf4B@?v|>a4_3d!bIC??i?f(EtW#MAKM)HS$5-m8{fnP&XwDbpYzBWCnewQGh>( zjWD7~ZNm+RHfBJS3OT_aN`agdAW8;(XX-_4v{8t^fQ>Q=^5?OUo`VphAXEsV6`0Z> z3iO@GIHM?k85?U9;V)rhjKWY)#X}v20!ln}2x1i!^qoct{u(yHD9&HS#v8@>D;YVd zV^D7;2Xz$crDUg$fW9;JCN|k9#oxdt872Aa*hJ4ssGpLXIsx@na#6>jKA`VJrWs}V z+t^g241WuoVw8pkDtV~W&;TWoItBGt5S0s57ANOnr#WHY)KCuvtb${ysL-^9SUFzNxd&P~{gY3=IK&Co<2d%0I^D8ddm5 z*c_uWG+Ze_ori`g`Kfb|bI=5Rr%{7{hAl9v^G~t)Mm7FPMq%m_G*T%MP5wEy&~p_UqZFmCK%MkV2}()o2IxCeKVqwm zM*IhCmC=xYkFE6Fg{CNFsXNeQr3`f&ngsezWS!BJ|BS6Qn(&{nHAZ7-x>A9<4^30b zQ}>{$N;%MX8ZG!VY=hC9PsP?7&G;`Fm8nP2Or;X_5SpP>q#l62Gd05r=d|J@PEt=x zJ{{ZWc?!)@s!~s&*-91aF*FPGorq{)ynzXZ6Z~-=w1(y@HK^y%Jf%AI44SJ{1AV7K z@-`MU2;RaZ1Lw_*+SDs(p;C)_2`x}+QZGQ?>CkiapY(i_@^bTEWlbK7WG%gIS;?Z#-@J9wi#{sZ`f8Nng5Dy@w|nWD0Qhf&|;+y z^%`0P(okfl(VqW_?J(N$Kd|jaTWGn`fO-!tQ|eRipruMZkcJwa1Q)j3=qSWuyNnJ( zOh#ks6SPukM16!-C=IC(APr5+jO{bJ2$`_GMrR=o+v7=v)+kM>FVJeG3H2FT1=3LD zH^VW!PCjQhA&aAWyF%-g7F0U4PH9f1L2H#}APqH~BR-}WPTb!)2RPZE8LcTD+NiXm zGN27gODY1=&@>ba8H|8nnn4RNrg|)hS1`(iHY< z|A=uz`<3o=HfW#Hjm`?~1!*X9-WVAlv|`v5V~kJ~yKIaWieQ&wazF=_o^*C7 zqy%UWq=7UPxo(UXN?_NFaYAwIsxcNiqV%D2L5G#zbWZ4y(hH=a#w4LMcGH+Bl)`Qp z6NHi({pm#Ln9`3D(XS1q;B|=^7wXs;JgT0C=1zlCf(GF=v= zp=r&qFUA_7DfZb|Ei}PC#Z-dsD%0tT&>dwOT>-ic(oiJbSTD4|(u{RNb1cG7yHVUnf4K^lt28M}l|xXainbi`wg9nc4572O1SudJjSL+_LoAPqJ43SIHc z#vY*yp2^rPbk0~yH-|ncYv^XsM`bnL6r`bPJ@9PCexW;_)z~L=!?VP+f>M?BbW7-q zvW{*6eFkYL3LCOO;gIp0kb=9715iZSMEjw1Wh31hN>es~G}O=p7Do+LU~t4x1Uf^Y z2`Cyv!l7s;5eCxGv|f02@O;K;VKAQ8I0a`? z_R`(pIAssr6?Q4RK^kg=1rGNa{}6t~y~gjt(2N6g3YxG7A|I7JVJ6P1(nAUHueK@S9JXxa?CqH$N4j#n`52-EQLF~7ojl{55E zI7vB84}tT5G!(63JP>B#m5uwtOuUkD501`&4uhk4@Eq(^LjY;0@mQFPS2G?7bMUIh zLt%EtIeH`5p zqIHZH!Xmu3@myGl*D{{LMU~6+IJk&%i5?3VRxW}x)Oal{#p@Zbge7=g^ZgI!Hs)HsCFdRAD{d!uTSr!<)y14?!Ag7=nP~hA!|pW@HGPGoH~4;p)m$dI4Nbc|y+zX=qvy zPc|$;!b!svM4X6O0@qSr(2L=k%5!=VTmz(`XglMZuoZ7>d=<9fZ44W(tGuR{!F7~Z z^isIC@)D$>#!q1f-of}GY{%Oh--T@%@933qedR5^0z*K zhMLiO3_Z*&!hXEFnOWGEkw$NXn=7gG2Dq8>gn5kbRQ6BbmQF(KHwc%VhtnSFLH2ONU{(ooYQ9Kw5<@xnp8r-=%o z44d8p6N*I#VO%k338bNENAbR94&eyi$ILDq#(T$XhubLM=xuPa@|E5SlOPR62bj5q z<9L5Fmv9X4XXb?4D?jO-a69D(y#sEmdA*lllVY0K{%ni=sj>pHJ08D zcTi*KT_6oj`yC%@<`vH1L(C-MG(I?HKioylMDK$;t8w&RxD!Z2(P3sZgM~Arj_Fr3 zAKYEdO8*9TQ?t+q;I3+BkcOHCgmd@^GpZVgn@(*j9imm($sTedK|R!Lv<%YFw2Sy? zvygBBA7vI4&f_Cv4#JFz&>`4qMr$wy(ol4qSyZ@;k2Q-3m+&!WVYsL2p%23WHJ&~M zvnmSGP_u+^4WD2Z7p~&t&0@k8Jtut(?ycsakHWpw?DP?khNj)bC!3{&8~7x%q;MUd z7;_Ttr{<hqq>r^%yq$bg4KpL9%5T9*U5+2~Q%!T>XtfA^ z86E}FP;{ADS9pytHR}kk@Fiw#c)VJIz7CI5i__QOv1&1phMEn8clZjkzVH@bZq^gt z=%wjf@IM;JKpKj! zGn)#Z@wH|X;S;{bYz$9VE714hX=-`;9z0bo2hvcpg^-4CFq;dh_O7<4f;7ePpwWrgXgN%KpJY2f{h1FLa=bjbeie4=~wVVwHEynUZB>bUw||; z?K{5BY$JTbx0=bqSA0v%TX>0Dmwp2;R_oBO;YA<~MR%I*g`fBivz_n*-)^>rm#Yov z_wX{cKK%|}s@4N(sM$$$;k(U_Vl2MP>>$SIjpbLWh1aVs=yZ6U+MG^< z*Q(7x8frRmf9F8#WPdu5e8&Lmt!W+JsJ5ar;0 z*-Ol>x21o;Thuo6cQ~jf)89ZEnx254F#CzQ@#AJ+F&BO;HWuElwr67CZE8FEC%hG; zq3CIIpqK|gWeyM%@snnMc$eCViGz2l9T^wAL+t?4P;-cwf;Sj>x`F$alBdRHb3 zyhrWAWQKREotaD^4Ndns6-u1kcV`~csepk0Bi0S?SGzOW;C*U0CM&!bq@n0}bGTRl zKW7dT^W%S*TyzeYfni4iFc5q|?E%tIbCg&Zzi5sW3*i^c5n@4|W#VB~Wf(Mi3BVvA z4NWhGUoppsMe)n#Xt4->DK-auQ0>WNheK+B@xU5LL(%Kzc(DY2%^W8d$FG`W;Uj7v zCKr5I?akzb52?LC8fs1wOXD}qiDD`IhB-kjsrP3R;bUq)CILRG_GNN|G&H>&e#e|5 zmc?(Ilf^Rlt=PQqNp&ET1fNg`FnQqPAPq(Do72S#_&sx)SRTJ?PKD2?LzrLS)9PR* zAACw3lmXCCbCy^ce`wAWE8!2!8Dd49WAaBG6yt+`SBElQkcOsL!=IRQ#H#pXbGBFo ze-v8?{zDzk6pYr!VhX@vkcOhq&G}*t{FymVtd2i5=fW4%QA`o|ygHI844+d+WB@eO zTqM@UUz!WWTKEfdfml-?%M^z%sbiR8@I`erQxv43>Gkk8<`S_k{@PqD*1=!JmV&RU zaCm}|tQ_-Avq*aZI+TM53aPG>5@ zchqT21^6~dL(z0|z1RXzGuMgD@lRDtiSGnvZpJ#_|1L(NTMYdpi;D7L~Q z<_58)K9{KuKT_u~)!>KfY^EwmL(_5GjLre$MpOdedTcHDsXCvj2|rQiF*V@FAPq&o znvzK3wke7PZkYo7LS4kvfuE}jncDC(bpc32&8=cv{JXhDY=eI@gJQD2l&KHDQkO9G z;Fs!RrY=ZB(>o9`=610?{?pthw!?qKHiF-(%bAAo8+94e0DcY9(D*oWm)MDLnLEXf zM69_3{-Ca6n!xYXl}uy!ow@>~q2^w(E0NjUBX%J&nY+c#`dX$r{7GHIG=o2?tC^-C z4NdPsWHa}R-HEK`KCv5-CAJlus;*~R!e7*NObhrkNJHb{XbuknnU3Tl+~xr|qHbdR zaJssYX$_~T8!`YIYHA`&pr$G^1QHG5=mJB)(M%#7&QLcq7)V3YdlA{qgJMs@V}`^4 z5g*$Iwp58phNE4OumRH0_*~`@u@8~cJS_Gma+rtUZ|YX29sE_@!nB2LH3-sB^SIcb zNHC9y{fOM=QL(SSgXswWP`5K3;P2`-raeeQ(+3er=1FlNk;gnC4j>X^yCAXZE~YaQ zqwZun!9PJ78vl!VMjS%qGf#_yiM-}1B$K+A>4wCqdzh|>OWh69&}arD;fp#}!W*q- zrXOHZkSywcrU#N)-N$qXX=wTgqM-SQIGiXDt!+r;k7W?IlL<;A*&Gi{Az7jKf2X1G zMa&E0D59`=UK~jjGS4B1sxbj1dJ13>XCeTkq2^_AEK$t7B#t49nis{<`XQz_60aU) zdLgJ9VtRr!G<^b5(!45;CrX%C#BoIN*nUV3^$61!$*vw|`XC;VhQ^mMZ-|qK(&lw> zB2mh`hU8X{GXs!Z>M^E2l2bhj(oplZIF%@8-V&z}WzC!7Wc?H~7)ex5GJ}u=^#n5z zq@n3Eh>GT2aXL}KydzE{%E$hSJ_Ko~ z84cv)W;7ENGaV1CKVudm)zzoW0;HPygqaW0(DWdYY+9m3kY@B0K*TOVYN;=n#Yj!{ zIkO0<0n*U;cIG#6E78{cDsCa#m^M;Zea$RG>Zq@nrATe{B}hZfpW+UpgZV?;PP8|_ zi`(>f%u1xb`j%ON)KlLu%Rw5NzMJT5#frO#PF9S#ljs<`25F>zU{)gy)%VOQqyb1n zTkq z)l_B!(oFrrtOsdmx<| z3qml}a4rY@s_sk*w^lPi8ftmOLqsntUOY(jv`{gm+sqb(P%S2i;Ht?;APr4FO7yjI zh)0M%R(A0)(K~iK(nkHpY(tXOugq421Zime04uk6oak@m5|0u6tei-D^(V6vX{Y{R zb|7ul?;s7e@`$I1K~|!8k{DLRz9S=mX-Mp>85324j^5%%peW5 z3W(>35mtVuw$*UUC!RGRC!FILFee|Vhn9_zK^mHVkr-_i5-$*=tb*csVr1+=gwYTt zggC(;=K%%M(D-pyQSmY{)+!=iBF0#Sk)E1|IgA9fc;*noYA8rUtrFrjVuDp%yh@C> ziiuZ@oXjz#x0ZuBiuBU5Ge`A1bmYX?&^wn}P$B{lD z4UL~>l@)IjQ>`-MEn(sX&q$M$DKpL9%5+4w=tcv1&VrJ|g$gi5$$%Y)N{oDJPl@?fHSvj2n7M?E)Cw^d zkr7%!<^o7V(_a#ctyVv$u-d`>Kky^4&{iZWM_(OMDaGBOIJq4CSCy5ehMsZ~dO zMJ%yuBjdFa%ynd(R-CzpjMa*PG}LM!z9Uvx^~JZua;u*B#wg9)LMCdZn48E1tt4{; zq@n2_iPcsk@dL5SYAC)ZR>s~%rf6lEJIG|M409Wq1k%v>byidHGqKicB7P#)SdEeC zS_S4lGEFPb+(V{ntrGJPnW0r=9)L77J;PC5 zt;C2^+oz?NPHc>Qip5i@K8!)Eb$u)nJ|@^R(*B zGi0t-4WywKDcVHPB1DUjEL=2=+RQ6tp;n7|i7e1+GA}?Hn*N>GX0;K&5nHWf@hh<< z_ARnRtINDW7Hf5w*T^D}hQ{x-+KWGl9acN>2eI91i!9d~Fz=CNT7BjnvQ(?57x+63 zwK_>IVz<>%iY0bg9i$keG4lyosWoCgA}h3p%m&CLLL)HD}V0wOTWfhFX*aIpO#e z$?fEG_K>m}tr;EJsI_7;kPTW(CIZsX2ug%3MnVY9q9vG6W36Z=DPtm=H9un@n?M>G zf7t3Nd5A++K#C_0S}Y=IB=a?T3SsQ%M1l^`P^*uWlQ?SimU0kBtX@)fqb>6T*`l>! zz9T^`nfV6N&`1Jt!s;jGCXQQurCh`@S1ht!YtP0Y+q8DfPh=}dL*q|d1EoB~DQkd~ zNSw6#BfGRtY#g#v>&Uv09a;yFhFU|Ud`@M`!BSr0j5SC~GP<%^kUd%#HZ!tY>&#{X zX=udf)T#Yd@;cecv zHCie{Tyo_=4r)Ew>_|upupUGMX=wa)YrIr~xMq!$iW66@vB(jv51R`)to3GdB8RkI zAPu!9Nu`OK)JYD7rI4%Ic(x>RMH|PK zKrX|-{hfx!zqgi44TyKvGO0fC)>?|(&?d2Ekn7q+wls20n*h>KYn9ZP_-L(^8WA6? z6;eZEDq9}8rA=YWAvd+jY*~z00l5v* z(D-y~z0`t8v(`z?iBxMX@<5x#RzdD-Gug_>J#7X^L#<6xYa+wiD77LY)&{AiF_*25 zJksW{)sTnUY_=*$LnAn0MkN4YSeqq3p}T4!Pqq1MP2`C-kF9|`25D&gS4)ye!j4`7 z5SAq%FSJE$9pt&Tkgbh8(-wd<)Y>YwCB9o*q&CDiD<~x!OWFF!D{To|4|%CAX6u49 zG}3{Lv9?R?iJ#UssU7jd)d+d3EoU1dZ?t7>1LQSGLp^cUE~yjgvUW-x$yjR#@L#@41S2DA;N9samvUW?IjkRoZsNJAsN$n4fZsVC{NLQ;T?ceO#HT_hvX zKoW@x49bOmuZYW3^puXEa9J$#z11f;80g zi*-gCLguqhOM}V0)+scTwwLXO#%X)luBc1f4bsr4VCK^p2QVqK6%k%g`E(nzw9 zbq+-|jSZl%rm`#wX$nX~t;^C_vY2&A8bcPfE=r?~Lu_v}UOUM4LQyTm_5^8YWCB^z zx+;w)OITN=ab$5C9bzPcBma?v)xwYf$05q3& zjO~x+)Q*BQ)VeKACCgd2q$y-s>!vi>IK>V|6Sb45yb2vtZF@!W*g_&k!XJH4|W9V)6TNPK^huaK-RRLO7qDY))Q$SS=}`T zEu>vwN23L`^Xw?J07yeUb*vZCBC@viTv|xhvYw$uwae@{w1{?z9g7y$E`l`FdMzy_ z>sha)C1hRerL@?%#!f_wYggF`Xff>yJ07H=kriY^>#ek$Y+${SmXY;cQ_xb{4R$hG zQoGJhLQ8-&)YHWJAgv-BTkoZnWFzYxT2{NwPD9IRx7ev@Y3*ioSN?YzYJHa0lFh77 z(i*a<^-)@F++$~=<+Z!)478kfhn)`6(8va|rIjkJCtFxwq;+I-*BrEx_JEy@R@CmZ zv(O444fXh~h_s1pZKX>a$yQbxT2*_@&O@tckJ!0rW$htIL#=2!CvHVOFlJ>)n~i7e zLbST}lwE*U)1I*NK^htflF3%|6hKBn`wdd?2v<65+J?*S-(pIvq z^;OzJwy|uquJ)Q;hSt$uu}jg~+DnjzT0f;7WC!bqw4H2keV4Ww@7R@SeeEr~0vS z+En|@u0xw>pV+l%W9=hIL+z~60kVgkMcPkxw=+xoj5Ky5+FVOzH=xb5FYJ1dhDH>U zwxjhNC_9_<8=2zb(N(%DUD3y_9-0(SH(JZncMav2*!9SFdRs9)1r0d1{i zfHc(hNQcN?cD!_u>}jJ?$gtThsB`ha22p1UnUz2q8aYb#wR1>E$Ub&<=`h*bwH|&B3^~M3l1`I@UHj24p-k*Pv~wtq z-HUbtX{cwI?RBczaP}`wP2*qfd}#MjR`xfvTPO>A0PPye4AM}$fOL)=Vdr-$oDa8s z(peL7l5(6%K#uBi>Or$INJAqR$j>*E-0NRN4gH8PVk2fp-u{r6V9QKCV!`) zo^f_j=`uOiE+Snb$Jm9@o*@r=7!8Et*+Zz)45XoU3F#U+!7eUcCCA&vq$_4l_88hb zl!HBr_6lWZkAO5Za+92Fmy&LflkAexb#kKXB-$^On>~T{4dr5wqkRzn-)X34nq5}9 zO-{ATNVmu-c4>5AC=YuY9S}-nPoe!o2_Ox%D@gaq8FqQ;E;-#UC*3jgIrZrWhw`$& zqk}?8>=}@TMjn#0?Ml)Ea+Y0Dx=+q@{ee1l&>Yk1RKs^Fn+-wQ{hfw-=Gj%H$K+hQ ziu8z_V^>CphYGOgQRm(m>^amCv>*+&Ye>(?1$K4mDLLP+COt6=vzO43p+f9MbVR5i zdjX`Ok(cCRyO#8VTx8djo|6k*SJ5${qU;rPbf^e>865@EP|q^EuJoE*YS)ookxT5_ z==e|x_BuK)RGhtrjtvz9X{g;mdPlCX>q~FR<#s*kjaiz#g-#5WVsD}oLM7Q7APtRt zBv;#wqz~jOyP@=+Td>phbpl5 z(P^Rb>^*d9s2oT`?G{oRxxsEOrIPFIX3`h4GW!Uf8LGrSL}!F5vJXHS8p$9x+pVOC zGmYO;N+&nEo}zO?RoN%#>`)cX8MK|?kMqDu0Wxc|uh4~|TI@@7L8vDC0;Hjl@8mYSjr5J&Y9~uy$t|w8 z=#o%f_6@o?REK?yE&^$&XQ$m>`bqAv+etsj?RHyqd8h&V9$gly&%Q&KhU$Sd)b12? zk-P1V!B}#a-60rbHfBGeD?^RgkLZd}L-qqmLo+gy`|K{kOypj>b1;tF<4Q%>gqpHn z(AA+P>}PZpNJBlp+1-O#$pdz`U>0(}-4$IQYQd(X>q5=hG<0pK8AwBI$MBMhof34D zvfU$?&1}u;=*CbhHUr%dYRN`G8k&KUA=?SwlbY?MaFVLaLirHJn&{?`pEb};$h5!H zP|snzXV60)vID_*@}SK+rvQ@u>P!R>tnH+O;vfyR`vh~6NA2Fh9OMzZS1`NTmi>Wl z3AJIrqrp%z`wgU_842VGyI(LjdED+B%taoHi$%AG+6Q9LZJ~DTPjo9tLp`VMfx$fF zDSJRLkvwVlM|Xuf1>(@1p^gC;x+By9q@nhZU_SD9dvGu>dBz?TOftI$vY>lHT>_cW z-J#BbOdt)-@R4WjUxQvUY!40oLjEJpjqVS14`f64g}Mc@qI*Fa>N#%@4;CQL*~5bQ z$vvEJ0qg#|4X%SM9Orkx-vN zF7$AycOWNvDAWt2q4uO;Y4WB$F<6ScVNVE_H2Vh<(PN>0fduqusBa)QNJBHqk$3DV z!LsCSdvdT0c`Gh2dNMRHkc6HH4G82xk0SsL_1w3o2P=^G>}kRBcV^Th-`Gonb;;NE;$R)} zRa`0bYG{0*Bzh$@E>Hr!4Ej#bdwY4X0r}2e7OYRcwU?qdLX!ez(CeXzfzs%;&;-zT z+N*+%$&dERU?cK_y&~AqoEj*P-U>|#ltXWZCI`xbzB8j4`NdunY)XE%R|lJrpW-T^ zcSF+y712APX@LspZP0gm((U!Z7G#>eF4&w*wb!B#LbC!@(EFj8fy(H;&_An}cV^(EY4bshH0;emKdHynLZ61_2Wp~E zLh}MO(8r+f^nA6YAW7P`7$iu`7SI==MS(i#^U%UTZS+}a0q8sJt--eBcY8~)4f)Lu z29wRDf%@pH(2_ts^kryqpf2b;Gdd*4*xQ5c$)EPNU_0_hTqE>tXnCL^`X;n2&;Wf6 z`p%d*dsnbivdi8X?3f&D??69OfP_cV_fR&Sviqc2CY~?+bQI&Jx!OO%1INv_!vz)&*LipF!Ul z13UG2sAS0gEtrz*why3@(58SNO%H7hv_{iH8$jRbgkacY)Jeo)k`aev(WVd}&}da4 z9F5k7#z5bh(JMK-eK6QF*<*)-f#mqOHfS^ul#E8xfk@NKA`$(`)a!&hjuy=9} z`w;pqv^CHU{TkX5Xp1^sfWFf{9_*i-U>^(iOU`W{4fZv61UjNWLfZo!(C?vbf%c&9 z%ovoMWS4<93KkmP*!>EPhxy!NU1 zOrgDjZt-!UJ%O(AuF!7KcjhhRa^)?VX-CJOJ2Q>#ba|Ka_(ax(9+_rlyWbL;Y8mWfQX{?5L9%6O(wL zPQtmGiB~)^F?q`-UKo+^%oSa_XLQ0P*MBYjHa1~P(dbfPQ*44<^uHG0IvtxZw@7sP zlUK0`w+jE)@+0$H2@n44W%WW=!lQq6qc3(P+&UZ^ZE)d2T*3|K;XhyY!2HY!$Np^b zWnt!oPycH1Xi4S--TJe|jd@xBd>C!vyovQKkzqp_ZU8kb(6`qhCLz1Cu$U_;=g@&n zzrj?m!GR$?`wi|x^{Z0(uP1W!>=zggEJyxBgJHwDVlkN4DLIf3eLDM~exv`p74Rg` z=f6Fh#c7%g7)sCK)vG#14Wh64_p$u-M0`TQ1XogCuu-o-zd_8vN|kFCgPej0SlRB; z-_Ga~Hy6n3%{#w%Hm~zxREx=$CGUTF?+Gqvy~NOn#K^GX+3F_!xmhRuxwR)gah3eH zKV$NGohl4537MRp56y8WlukT1!=2DNruc;sf4)GrESd5q0q@uOZ2#*2QDWlctj?On zzYX&y{kc~p{kfkc#{adW{PmZ!c_n0W{&W5LI)A>>3+I)}CN7`pPG}xe{M%S>0`kAS z73ZV<>!nWJk4?PS(pk4|_3VG{WJ%He?cYbgc&?GcVEdTr*{?j(}*Qi$;uEZUNtGKheGwgr=05;BZCsd6o?(9;3 z_Ved3|HBsdS2vsUCLVJ~*Dbzzc4lXs|6z33oc=1t6nAz-Z$kV($MwH&rhj#NFFh{t zLwa1aQS?LoA4j32|8ea4_jfRLzVi+em(O-5Y>ACYoc$PfHsa#h5>L%==Z+qvl9GmS zeabUK8E4Ng|K~xV{K#J1uzo`ZCuOcuzH<4BF-fKV#~QUITj$D_pzVxWa>(LD8px-O}0g zoz?!?tl!|_z54XxoCSkN3>?U?1Dys=Prb?({;wA)TzlMv31$D)+242n|GU0`fjMvC zzX#_0;T!}1&&LHG>;C`u^!Io8e>EuJ4Mq=^|2?kZ{RZ_KHk=tW^#A-#{(Wfw*WH1` z``7XMzX#d#uVp2x)GS-2)c@B&drXK)YW>d*wcH@C;6JzQf|bfws$9NO!IJEVegk_J ztjtskRIDDTR +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# #include +# +# namespace +# { +# +# using namespace DB; +# +# const UInt32 ROW_NUM = 2000; +# const UInt32 MIN_STRING_LEN = 3; +# const UInt32 MAX_STRING_LEN = 5; +# +# const UInt32 PLAIN_ENCODING_CARDINALITY = ROW_NUM * 2; +# const UInt32 MIX_ENCODING_CARDINALITY = 800; +# const UInt32 DICT_ENCODING_CARDINALITY = 20; +# +# UInt16 nextNum() +# { +# static UInt16 idx = 0; +# static UInt16 nums[] = {0, 21845, 43690}; +# static size_t nums_len = sizeof(nums) / sizeof(nums[0]); +# return nums[(idx++) % nums_len]++; +# } +# +# template +# void generateValues(MutableColumnPtr & col, size_t num) +# { +# using FieldType = typename NumericDataType::FieldType; +# +# const size_t next_num_bytes = sizeof(nextNum()); +# char bytewise_val[sizeof(FieldType)]; +# +# while (col->size() < num) +# { +# for (auto bytes = 0; bytes < sizeof(FieldType); bytes += next_num_bytes) +# { +# auto tmp = nextNum(); +# memcpy(bytewise_val + bytes, &tmp, std::min(next_num_bytes, sizeof(FieldType) - bytes)); +# } +# if (is_decimal) +# { +# // clean highest 3 bits, make sure the result doest not exceed the limits of the decimal type +# if (bytewise_val[sizeof(FieldType) - 1] > 0) +# bytewise_val[sizeof(FieldType) - 1] &= 0x0f; +# else +# bytewise_val[sizeof(FieldType) - 1] |= 0xf0; +# } +# FieldType val; +# memcpy(&val, &bytewise_val, sizeof(FieldType)); +# col->insert(val); +# } +# } +# +# template <> +# void generateValues(MutableColumnPtr & col, size_t num) +# { +# std::string str; +# while (col->size() < num) +# { +# auto len = MIN_STRING_LEN + nextNum() % (MAX_STRING_LEN - MIN_STRING_LEN); +# str.clear(); +# for (size_t i = 0; i < len; i++) +# { +# str.push_back('a' + nextNum() % ('z' - 'a')); +# } +# col->insert(str); +# } +# } +# +# template +# ColumnWithTypeAndName generateColumn( +# std::shared_ptr ch_type, +# size_t cardinality, +# const std::string & col_name, +# const std::set & null_indice) +# { +# DataTypePtr col_type = ch_type; +# if (!null_indice.empty()) +# { +# col_type = std::make_shared(ch_type); +# } +# +# auto values = ch_type->createColumn(); +# values->reserve(cardinality); +# generateValues(values, cardinality); +# +# auto col = col_type->createColumn(); +# col->reserve(ROW_NUM); +# for (size_t i = 0; i < ROW_NUM; i++) +# { +# if (!null_indice.empty() && null_indice.contains(i)) +# { +# col->insert(Null()); +# } +# else +# { +# col->insert(values->operator[](nextNum() % cardinality)); +# } +# } +# return {std::move(col), col_type, col_name}; +# } +# +# Block generateBlock() +# { +# ColumnsWithTypeAndName cols; +# +# // test Int32 type +# std::set null_indice{512, 1001, 211, 392, 553, 1725}; +# // Nullability is expressed by definition level, and encoded by bit packed with smallest group size of 8 +# // when null value appeared. Here we make a big bit packed group with more than 1000 values. +# for (size_t i = 0; i < 170; i++) +# { +# null_indice.emplace(622 + i * 6); +# } +# cols.emplace_back(generateColumn( +# std::make_shared(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_i32", null_indice)); +# null_indice = {917, 482, 283, 580, 1926, 1667, 1971}; +# cols.emplace_back(generateColumn( +# std::make_shared(), DICT_ENCODING_CARDINALITY, "dict_encoding_i32", null_indice)); +# +# // test string type +# null_indice = {818, 928, 1958, 1141, 1553, 1407, 690, 1769}; +# cols.emplace_back(generateColumn( +# std::make_shared(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_str", null_indice)); +# null_indice = {1441, 1747, 216, 1209, 89, 52, 536, 625}; +# cols.emplace_back(generateColumn( +# std::make_shared(), MIX_ENCODING_CARDINALITY, "mix_encoding_str", null_indice)); +# null_indice = {1478, 1862, 894, 1314, 1844, 243, 869, 551}; +# cols.emplace_back(generateColumn( +# std::make_shared(), DICT_ENCODING_CARDINALITY, "dict_encoding_str", null_indice)); +# +# // test DateTime64 type +# auto dt_type = std::make_shared(ParquetRecordReader::default_datetime64_scale); +# null_indice = {1078, 112, 1981, 795, 371, 1176, 1526, 11}; +# cols.emplace_back(generateColumn(dt_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_dt64", null_indice)); +# null_indice = {1734, 1153, 1893, 1205, 644, 1670, 1482, 1479}; +# cols.emplace_back(generateColumn(dt_type, DICT_ENCODING_CARDINALITY, "dict_encoding_dt64", null_indice)); +# +# // test Decimal128 type +# auto d128_type = std::make_shared(DecimalUtils::max_precision, 3); +# null_indice = {852, 1448, 1569, 896, 1866, 1655, 100, 418}; +# cols.emplace_back(generateColumn(d128_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_decimal128", null_indice)); +# +# return {cols}; +# } +# +# void dumpBlock(const Block & block) +# { +# WriteBufferFromFile output_buf("/tmp/ut-out.csv"); +# auto out = getContext().context->getOutputFormat("CSVWithNames", output_buf, block); +# out->write(block); +# out->finalize(); +# std::cerr << block.dumpStructure() << std::endl << std::endl; +# } +# +# } +# +# EndOfCodes +# +# How to generate the parquet file: +# 1. Use above C++ codes. +# Put above codes in src/Common/tests/gtest_main.cpp, add following two inlines in main function: +# tryRegisterFormats(); +# dumpBlock(generateBlock()); +# 2. Genetate /tmp/ut-out.csv. +# After compiled, run any test, such as "./src/unit_tests_dbms --gtest_filter=IColumn.dumpStructure", +# 3. Generate the parquet file by following spark sql +# create temporary view tv using csv options('path' '/tmp/ut-out.csv', 'header' 'true', 'nullValue' '\\N'); +# insert overwrite directory "/tmp/test-parquet" using Parquet +# options('parquet.dictionary.page.size' '500') +# select /*+ COALESCE(1) */ cast(plain_encoding_i32 as int), cast(dict_encoding_i32 as int), +# plain_encoding_str, mix_encoding_str, dict_encoding_str, +# cast(plain_encoding_dt64 as timestamp), cast(dict_encoding_dt64 as timestamp), +# cast(plain_encoding_decimal128 as decimal(38, 3)) +# from tv; +# + +CH_SCHEMA="\ + plain_encoding_i32 Nullable(Int32), \ + dict_encoding_i32 Nullable(Int32), \ + plain_encoding_str Nullable(String), \ + mix_encoding_str Nullable(String), \ + dict_encoding_str LowCardinality(Nullable(String)), \ + plain_encoding_dt64 Nullable(DateTime64(9)), \ + dict_encoding_dt64 Nullable(DateTime64(9)), \ + plain_encoding_decimal128 Nullable(Decimal(38, 3))" +QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')" + +# there may be more than on group in parquet files, unstable results may generated by multithreads +$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY" From e1fcdba4dd51a4b4af500c1a09663820004a4a76 Mon Sep 17 00:00:00 2001 From: copperybean Date: Sat, 24 Feb 2024 22:47:53 +0800 Subject: [PATCH 155/651] fix style Change-Id: I8f7ebd173558b16d94d3161cb0b5300e7e78833d --- .../Formats/Impl/Parquet/ParquetDataBuffer.h | 21 ++++++---- .../Impl/Parquet/ParquetDataValuesReader.cpp | 40 +++++++++++++------ .../Impl/Parquet/ParquetDataValuesReader.h | 6 --- .../Impl/Parquet/ParquetLeafColReader.cpp | 18 ++++++--- .../Impl/Parquet/ParquetRecordReader.cpp | 3 +- 5 files changed, 54 insertions(+), 34 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index d4956f83092..f21216d5b5d 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int PARQUET_EXCEPTION; +} + template struct ToArrowDecimal; template <> struct ToArrowDecimal>> @@ -27,8 +32,8 @@ class ParquetDataBuffer private: public: - ParquetDataBuffer(const uint8_t * data_, UInt64 avaible_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale) - : data(reinterpret_cast(data_)), avaible(avaible_), datetime64_scale(datetime64_scale_) {} + ParquetDataBuffer(const uint8_t * data_, UInt64 available_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale) + : data(reinterpret_cast(data_)), available(available_), datetime64_scale(datetime64_scale_) {} template void ALWAYS_INLINE readValue(TValue & dst) @@ -84,7 +89,7 @@ public: auto value_len = ::arrow::util::SafeLoadAs(getArrowData()); if (unlikely(value_len < 0 || value_len > INT32_MAX - 4)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid or corrupted value_len '{}'", value_len); + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Invalid or corrupted value_len '{}'", value_len); } consume(4); checkAvaible(value_len); @@ -110,7 +115,7 @@ public: auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num); if (unlikely(!status.ok())) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Read parquet decimal failed: {}", status.status().ToString()); + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Read parquet decimal failed: {}", status.status().ToString()); } status.ValueUnsafe().ToBytes(reinterpret_cast(out)); consume(elem_bytes_num); @@ -118,14 +123,14 @@ public: private: const Int8 * data; - UInt64 avaible; + UInt64 available; const UInt8 datetime64_scale; void ALWAYS_INLINE checkAvaible(UInt64 num) { - if (unlikely(avaible < num)) + if (unlikely(available < num)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Consuming {} bytes while {} avaible", num, avaible); + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Consuming {} bytes while {} available", num, available); } } @@ -134,7 +139,7 @@ private: void ALWAYS_INLINE consume(UInt64 num) { data += num; - avaible -= num; + available -= num; } }; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp index 3afc66dcb36..4ebe3d6a636 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -8,6 +8,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int PARQUET_EXCEPTION; +} + void RleValuesReader::nextGroup() { // refer to: @@ -142,7 +148,7 @@ void RleValuesReader::visitNullableBySteps( individual_null_visitor(null_map_cursor); if (unlikely(valid_index_steps[step_idx] == UINT8_MAX)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported packed values number"); + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported packed values number"); } valid_index_steps[step_idx]++; } @@ -270,7 +276,7 @@ void ParquetPlainValuesReader::readBatch( auto idx = cursor; cursor += count; - // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is avaible + // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is available for (auto val_offset = offset_data[idx - 1]; idx < cursor; idx++) { offset_data[idx] = ++val_offset; @@ -394,14 +400,17 @@ void ParquetRleLCReader::readBatch( cursor, num_values, max_def_level, - /* individual_null_visitor */ [&](size_t nest_cursor) { + /* individual_null_visitor */ [&](size_t nest_cursor) + { column_data[nest_cursor] = 0; has_null = true; }, - /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) { + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) + { rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); }, - /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) { + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) + { if (is_valid) { rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); @@ -435,7 +444,8 @@ void ParquetRleDictReader::readBatch( auto * offset_data = column.getOffsets().data(); auto & chars = column.getChars(); - auto append_nulls = [&](UInt8 num) { + auto append_nulls = [&](UInt8 num) + { for (auto limit = cursor + num; cursor < limit; cursor++) { chars.push_back(0); @@ -444,7 +454,8 @@ void ParquetRleDictReader::readBatch( } }; - auto append_string = [&](Int32 dict_idx) { + auto append_string = [&](Int32 dict_idx) + { auto dict_chars_cursor = dict_offsets[dict_idx - 1]; auto value_len = dict_offsets[dict_idx] - dict_chars_cursor; auto chars_cursor = chars.size(); @@ -462,7 +473,8 @@ void ParquetRleDictReader::readBatch( num_values, max_def_level, /* individual_null_visitor */ [&](size_t) {}, - /* stepped_valid_visitor */ [&](size_t, const std::vector & valid_index_steps) { + /* stepped_valid_visitor */ [&](size_t, const std::vector & valid_index_steps) + { value_cache.resize(valid_index_steps.size()); rle_data_reader->setValues( value_cache.data() + 1, static_cast(valid_index_steps.size() - 1), val_getter); @@ -474,7 +486,8 @@ void ParquetRleDictReader::readBatch( append_nulls(valid_index_steps[i] - 1); } }, - /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) { + /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) + { if (is_valid) { value_cache.resize(count); @@ -505,13 +518,16 @@ void ParquetRleDictReader::readBatch( cursor, num_values, max_def_level, - /* individual_null_visitor */ [&](size_t nest_cursor) { + /* individual_null_visitor */ [&](size_t nest_cursor) + { null_map.setNull(nest_cursor); }, - /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) { + /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector & valid_index_steps) + { rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter); }, - /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) { + /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) + { if (is_valid) { rle_data_reader->setValues(column_data + nest_cursor, count, val_getter); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h index 66a1f4877e4..8bc381aa8d2 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -15,12 +15,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int PARQUET_EXCEPTION; -} - class RleValuesReader { public: diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp index e2677d7cae3..17feea80b9f 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -216,7 +216,8 @@ template ColumnWithTypeAndName ParquetLeafColReader::readBatch(UInt64 rows_num, const String & name) { reading_rows_num = rows_num; - auto readPageIfEmpty = [&]() { + auto readPageIfEmpty = [&]() + { while (!cur_page_values) readPage(); }; @@ -245,7 +246,8 @@ void ParquetLeafColReader::resetColumn(UInt64 rows_num) if (reading_low_cardinality) { assert(dictionary); - visitColStrIndexType(dictionary->size(), [&](TColVec *) { + visitColStrIndexType(dictionary->size(), [&](TColVec *) + { column = TColVec::create(); }); @@ -289,7 +291,8 @@ void ParquetLeafColReader::degradeDictionary() ColumnString & col_dest = *static_cast(column.get()); const ColumnString & col_dict_str = *static_cast(dictionary.get()); - visitColStrIndexType(dictionary->size(), [&](TColVec *) { + visitColStrIndexType(dictionary->size(), [&](TColVec *) + { const TColVec & col_src = *static_cast(col_existing.get()); reserveColumnStrRows(column, reading_rows_num); @@ -411,7 +414,8 @@ void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) assert(col_descriptor.max_definition_level() >= 0); std::unique_ptr def_level_reader; - if (col_descriptor.max_definition_level() > 0) { + if (col_descriptor.max_definition_level() > 0) + { auto bit_width = arrow::bit_util::Log2(col_descriptor.max_definition_level() + 1); auto num_bytes = ::arrow::util::SafeLoadAs(buffer); auto bit_reader = std::make_unique(buffer + 4, num_bytes); @@ -435,7 +439,8 @@ void ParquetLeafColReader::readPageV1(const parquet::DataPageV1 & page) degradeDictionary(); } - ParquetDataBuffer parquet_buffer = [&]() { + ParquetDataBuffer parquet_buffer = [&]() + { if constexpr (!std::is_same_v, TColumn>) return ParquetDataBuffer(buffer, max_size); @@ -485,7 +490,8 @@ std::unique_ptr ParquetLeafColReader::createDi if (reading_low_cardinality && std::same_as) { std::unique_ptr res; - visitColStrIndexType(dictionary->size(), [&](TCol *) { + visitColStrIndexType(dictionary->size(), [&](TCol *) + { res = std::make_unique>( col_descriptor.max_definition_level(), std::move(def_level_reader), diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 42f131ff794..69e694a340f 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -27,7 +27,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int PARQUET_EXCEPTION; } @@ -142,7 +141,7 @@ std::unique_ptr createColReader( } } -} // anonymouse namespace +} // anonymous namespace ParquetRecordReader::ParquetRecordReader( Block header_, From 471dff6589abff5d05ab8a9bb267e198f377c536 Mon Sep 17 00:00:00 2001 From: copperybean Date: Sun, 25 Feb 2024 14:26:53 +0800 Subject: [PATCH 156/651] fix test Change-Id: Ia7dbf1d762f7f054a9aa677caaaff6bfe1a42c38 --- src/Core/SettingsChangesHistory.h | 1 + .../Formats/Impl/Parquet/ParquetDataBuffer.h | 13 +++++-------- .../Impl/Parquet/ParquetDataValuesReader.cpp | 2 +- .../Formats/Impl/Parquet/ParquetDataValuesReader.h | 4 ++-- .../Formats/Impl/Parquet/ParquetLeafColReader.cpp | 6 +++--- .../Formats/Impl/Parquet/ParquetRecordReader.cpp | 7 ++----- .../Formats/Impl/ParquetBlockInputFormat.cpp | 8 ++++++++ .../0_stateless/02998_native_parquet_reader.sh | 5 +++-- 8 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ece48620618..6fb8fb9358c 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -176,6 +176,7 @@ static std::map sett {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, }}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index f21216d5b5d..5c37375fa0c 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -38,15 +38,13 @@ public: template void ALWAYS_INLINE readValue(TValue & dst) { - checkAvaible(sizeof(TValue)); - dst = *(reinterpret_cast(data)); - consume(sizeof(TValue)); + readBytes(&dst, sizeof(TValue)); } void ALWAYS_INLINE readBytes(void * dst, size_t bytes) { checkAvaible(bytes); - memcpy(dst, data, bytes); + std::copy(data, data + bytes, reinterpret_cast(dst)); consume(bytes); } @@ -68,13 +66,12 @@ public: 100000000 * spd, 1000000000 * spd}; - checkAvaible(sizeof(parquet::Int96)); - auto decoded = parquet::DecodeInt96Timestamp(*reinterpret_cast(data)); + parquet::Int96 tmp; + readValue(tmp); + auto decoded = parquet::DecodeInt96Timestamp(tmp); uint64_t scaled_nano = decoded.nanoseconds / pow10[datetime64_scale]; dst = static_cast(decoded.days_since_epoch * scaled_day[datetime64_scale] + scaled_nano); - - consume(sizeof(parquet::Int96)); } /** diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp index 4ebe3d6a636..6743086e9e6 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -306,7 +306,7 @@ void ParquetPlainValuesReader>::readBatch( }, /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) { - auto col_data_pos = column_data + nest_cursor; + auto * col_data_pos = column_data + nest_cursor; for (UInt32 i = 0; i < count; i++) { plain_data_buffer.readDateTime64(col_data_pos[i]); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h index 8bc381aa8d2..688de4f52eb 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -97,7 +97,7 @@ public: * @tparam ValueGetter A callback with signature: TValue(Int32 val) */ template - void setValues(TValue * column_data, UInt32 num_values, ValueGetter && val_getter); + void setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter); /** * @brief Set the value by valid_index_steps generated in visitNullableBySteps. @@ -106,7 +106,7 @@ public: */ template void setValueBySteps( - TValue * column_data, + TValue * res_values, const std::vector & col_data_steps, ValueGetter && val_getter); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp index 17feea80b9f..52dfad7606a 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -113,7 +113,7 @@ ColumnPtr readDictPage>( const parquet::ColumnDescriptor & /* col_des */, const DataTypePtr & data_type) { - auto & datetime_type = assert_cast(*data_type); + const auto & datetime_type = assert_cast(*data_type); auto dict_col = ColumnDecimal::create(page.num_values(), datetime_type.getScale()); auto * col_data = dict_col->getData().data(); ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale()); @@ -282,7 +282,7 @@ void ParquetLeafColReader::degradeDictionary() dictionary = nullptr; return; } - assert(dictionary && column->size()); + assert(dictionary && !column->empty()); null_map = std::make_unique(reading_rows_num); auto col_existing = std::move(column); @@ -372,7 +372,7 @@ void ParquetLeafColReader::readPage() dict_page.encoding() != parquet::Encoding::PLAIN_DICTIONARY && dict_page.encoding() != parquet::Encoding::PLAIN)) { - throw new Exception( + throw Exception( ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding()); } LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 69e694a340f..9cde433b983 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -156,9 +156,6 @@ ParquetRecordReader::ParquetRecordReader( , row_groups_indices(std::move(row_groups_indices_)) , left_rows(getTotalRows(*file_reader->metadata())) { - // Only little endian system is supported currently - static_assert(std::endian::native == std::endian::little); - log = &Poco::Logger::get("ParquetRecordReader"); parquet_col_indice.reserve(header.columns()); @@ -230,9 +227,9 @@ void ParquetRecordReader::loadNextRowGroup() Int64 ParquetRecordReader::getTotalRows(const parquet::FileMetaData & meta_data) { Int64 res = 0; - for (size_t i = 0; i < row_groups_indices.size(); i++) + for (auto idx : row_groups_indices) { - res += meta_data.RowGroup(row_groups_indices[i])->num_rows(); + res += meta_data.RowGroup(idx)->num_rows(); } return res; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index e35d53dc4f4..2e849f09fda 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -484,6 +484,14 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat if (format_settings.parquet.use_native_reader) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native != std::endian::little) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "parquet native reader only supports little endian system currently"); +#pragma clang diagnostic pop + row_group_batch.native_record_reader = std::make_shared( getPort().getHeader(), std::move(properties), diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh index 5c129e6c5ce..4e5169c4bf0 100755 --- a/tests/queries/0_stateless/02998_native_parquet_reader.sh +++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -201,8 +202,8 @@ CH_SCHEMA="\ plain_encoding_str Nullable(String), \ mix_encoding_str Nullable(String), \ dict_encoding_str LowCardinality(Nullable(String)), \ - plain_encoding_dt64 Nullable(DateTime64(9)), \ - dict_encoding_dt64 Nullable(DateTime64(9)), \ + plain_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \ + dict_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \ plain_encoding_decimal128 Nullable(Decimal(38, 3))" QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')" From f68b788f5900b66ab4623874c98ed1b4025b5fd0 Mon Sep 17 00:00:00 2001 From: Danila Puzov Date: Sat, 11 May 2024 15:34:13 +0300 Subject: [PATCH 157/651] Tests and docs for serial, some fixes for generateSnowflakeID --- src/Functions/generateSnowflakeID.cpp | 62 +++- src/Functions/generateUUIDv7.cpp | 284 ++++++++++++++---- src/Functions/serial.cpp | 134 ++++----- .../03129_serial_test_zookeeper.reference | 8 + .../03129_serial_test_zookeeper.sql | 20 ++ 5 files changed, 373 insertions(+), 135 deletions(-) create mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.reference create mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.sql diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index e54b720ec98..dd837a58325 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -11,11 +11,42 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +namespace +{ + +/* + Snowflake ID + https://en.wikipedia.org/wiki/Snowflake_ID + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ + +- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds +- The middle 10 bits are the machine ID. +- The last 12 bits decode to number of ids processed by the machine at the given millisecond. +*/ + +constexpr auto timestamp_size = 41; +constexpr auto machine_id_size = 10; +constexpr auto machine_seq_num_size = 12; + +constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size); +constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size; +constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1; + +} + class FunctionSnowflakeID : public IFunction { private: - mutable std::atomic machine_sequence_number{0}; - mutable std::atomic last_timestamp{0}; + mutable std::atomic state{0}; + // previous snowflake id + // state is 1 atomic value because we don't want use mutex public: static constexpr auto name = "generateSnowflakeID"; @@ -60,23 +91,28 @@ public: // hash serverUUID into 32 bytes Int64 h = UUIDHelpers::getHighBytes(serverUUID); Int64 l = UUIDHelpers::getLowBytes(serverUUID); - Int64 machine_id = (h * 11) ^ (l * 17); + Int64 machine_id = ((h * 11) ^ (l * 17)) & machine_id_mask; - for (Int64 & x : vec_to) { + for (Int64 & el : vec_to) { const auto tm_point = std::chrono::system_clock::now(); Int64 current_timestamp = std::chrono::duration_cast( - tm_point.time_since_epoch()).count(); + tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1); - Int64 local_machine_sequence_number = 0; + Int64 last_state, new_state; + do { + last_state = state.load(); + Int64 last_timestamp = (last_state & timestamp_mask) >> (machine_id_size + machine_seq_num_size); + Int64 machine_seq_num = last_state & machine_seq_num_mask; - if (current_timestamp != last_timestamp.load()) { - machine_sequence_number.store(0); - last_timestamp.store(current_timestamp); - } else { - local_machine_sequence_number = machine_sequence_number.fetch_add(1) + 1; - } + if (current_timestamp == last_timestamp) { + ++machine_seq_num; + } + new_state = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | machine_seq_num; + } while (!state.compare_exchange_strong(last_state, new_state)); + // failed CAS => another thread updated state + // successful CAS => we have unique (timestamp, machine_seq_num) on this machine - x = (current_timestamp << 22) | (machine_id & 0x3ff000ull) | (local_machine_sequence_number & 0xfffull); + el = new_state; } return col_res; diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index 61d742d2fda..411a3a076ac 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -1,13 +1,178 @@ -#include -#include #include +#include +#include +#include namespace DB { -namespace ErrorCodes +namespace { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + +/* Bit layouts of UUIDv7 + +without counter: + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| unix_ts_ms | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| unix_ts_ms | ver | rand_a | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|var| rand_b | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| rand_b | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ + +with counter: + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| unix_ts_ms | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| unix_ts_ms | ver | counter_high_bits | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|var| counter_low_bits | +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| rand_b | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ +*/ + +/// bit counts +constexpr auto rand_a_bits_count = 12; +constexpr auto rand_b_bits_count = 62; +constexpr auto rand_b_low_bits_count = 32; +constexpr auto counter_high_bits_count = rand_a_bits_count; +constexpr auto counter_low_bits_count = 30; +constexpr auto bits_in_counter = counter_high_bits_count + counter_low_bits_count; +constexpr uint64_t counter_limit = (1ull << bits_in_counter); + +/// bit masks for UUIDv7 components +constexpr uint64_t variant_2_mask = (2ull << rand_b_bits_count); +constexpr uint64_t rand_a_bits_mask = (1ull << rand_a_bits_count) - 1; +constexpr uint64_t rand_b_bits_mask = (1ull << rand_b_bits_count) - 1; +constexpr uint64_t rand_b_with_counter_bits_mask = (1ull << rand_b_low_bits_count) - 1; +constexpr uint64_t counter_low_bits_mask = (1ull << counter_low_bits_count) - 1; +constexpr uint64_t counter_high_bits_mask = rand_a_bits_mask; + +uint64_t getTimestampMillisecond() +{ + timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + const uint64_t sec = tp.tv_sec; + return sec * 1000 + tp.tv_nsec / 1000000; +} + +void setTimestampAndVersion(UUID & uuid, uint64_t timestamp) +{ + UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & rand_a_bits_mask) | (timestamp << 16) | 0x7000; +} + +void setVariant(UUID & uuid) +{ + UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & rand_b_bits_mask) | variant_2_mask; +} + +struct FillAllRandomPolicy +{ + static constexpr auto name = "generateUUIDv7NonMonotonic"; + static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)"; + struct Data + { + void generate(UUID & uuid, uint64_t ts) + { + setTimestampAndVersion(uuid, ts); + setVariant(uuid); + } + }; +}; + +struct CounterFields +{ + uint64_t last_timestamp = 0; + uint64_t counter = 0; + + void resetCounter(const UUID & uuid) + { + const uint64_t counter_low_bits = (UUIDHelpers::getLowBytes(uuid) >> rand_b_low_bits_count) & counter_low_bits_mask; + const uint64_t counter_high_bits = UUIDHelpers::getHighBytes(uuid) & counter_high_bits_mask; + counter = (counter_high_bits << 30) | counter_low_bits; + } + + void incrementCounter(UUID & uuid) + { + if (++counter == counter_limit) [[unlikely]] + { + ++last_timestamp; + resetCounter(uuid); + setTimestampAndVersion(uuid, last_timestamp); + setVariant(uuid); + } + else + { + UUIDHelpers::getHighBytes(uuid) = (last_timestamp << 16) | 0x7000 | (counter >> counter_low_bits_count); + UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & rand_b_with_counter_bits_mask) | variant_2_mask | ((counter & counter_low_bits_mask) << rand_b_low_bits_count); + } + } + + void generate(UUID & uuid, uint64_t timestamp) + { + const bool need_to_increment_counter = (last_timestamp == timestamp) || ((last_timestamp > timestamp) & (last_timestamp < timestamp + 10000)); + if (need_to_increment_counter) + { + incrementCounter(uuid); + } + else + { + last_timestamp = timestamp; + resetCounter(uuid); + setTimestampAndVersion(uuid, last_timestamp); + setVariant(uuid); + } + } +}; + + +struct GlobalCounterPolicy +{ + static constexpr auto name = "generateUUIDv7"; + static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + + /// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously. + struct Data + { + static inline CounterFields fields; + static inline SharedMutex mutex; /// works a little bit faster than std::mutex here + std::lock_guard guard; + + Data() + : guard(mutex) + {} + + void generate(UUID & uuid, uint64_t timestamp) + { + fields.generate(uuid, timestamp); + } + }; +}; + +struct ThreadLocalCounterPolicy +{ + static constexpr auto name = "generateUUIDv7ThreadMonotonic"; + static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)"; + + /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. + struct Data + { + static inline thread_local CounterFields fields; + + void generate(UUID & uuid, uint64_t timestamp) + { + fields.generate(uuid, timestamp); + } + }; +}; + } #define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \ @@ -16,77 +181,72 @@ DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__) DECLARE_SEVERAL_IMPLEMENTATIONS( -class FunctionGenerateUUIDv7 : public IFunction +template +class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy { public: - static constexpr auto name = "generateUUIDv7"; + String getName() const final { return FillPolicy::name; } - String getName() const override + size_t getNumberOfArguments() const final { return 0; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const final { return false; } + bool useDefaultImplementationForNulls() const final { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const final { return false; } + bool isVariadic() const final { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - return name; - } - - size_t getNumberOfArguments() const override { return 0; } - - bool isDeterministicInScopeOfQuery() const override { return false; } - bool useDefaultImplementationForNulls() const override { return false; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - bool isVariadic() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() > 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", - getName(), arguments.size()); + FunctionArgumentDescriptors mandatory_args; + FunctionArgumentDescriptors optional_args{ + {"expr", nullptr, nullptr, "Arbitrary Expression"} + }; + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - size_t size = input_rows_count; - vec_to.resize(size); - - /// RandImpl is target-dependent and is not the same in different TargetSpecific namespaces. - RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UUID)); - - for (UUID & uuid : vec_to) + if (input_rows_count) { - /// https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#section-5.2 + vec_to.resize(input_rows_count); - const auto tm_point = std::chrono::system_clock::now(); - UInt64 unix_ts_ms = std::chrono::duration_cast( - tm_point.time_since_epoch()).count(); + /// Not all random bytes produced here are required for the UUIDv7 but it's the simplest way to get the required number of them by using RandImpl + RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UUID)); - UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0x0000000000000fffull) | 0x0000000000007000ull | (unix_ts_ms << 16); - UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull; + /// Note: For performance reasons, clock_gettime is called once per chunk instead of once per UUID. This reduces precision but + /// it still complies with the UUID standard. + uint64_t timestamp = getTimestampMillisecond(); + for (UUID & uuid : vec_to) + { + typename FillPolicy::Data data; + data.generate(uuid, timestamp); + } } - return col_res; } }; - ) // DECLARE_SEVERAL_IMPLEMENTATIONS #undef DECLARE_SEVERAL_IMPLEMENTATIONS -class FunctionGenerateUUIDv7 : public TargetSpecific::Default::FunctionGenerateUUIDv7 +template +class FunctionGenerateUUIDv7Base : public TargetSpecific::Default::FunctionGenerateUUIDv7Base { public: - explicit FunctionGenerateUUIDv7(ContextPtr context) : selector(context) - { - selector.registerImplementation(); + using Self = FunctionGenerateUUIDv7Base; + using Parent = TargetSpecific::Default::FunctionGenerateUUIDv7Base; - #if USE_MULTITARGET_CODE - selector.registerImplementation(); - #endif + explicit FunctionGenerateUUIDv7Base(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE + using ParentAVX2 = TargetSpecific::AVX2::FunctionGenerateUUIDv7Base; + selector.registerImplementation(); +#endif } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -96,18 +256,34 @@ public: static FunctionPtr create(ContextPtr context) { - return std::make_shared(context); + return std::make_shared(context); } private: ImplementationSelector selector; }; +template +void registerUUIDv7Generator(auto& factory) +{ + static constexpr auto doc_syntax_format = "{}([expression])"; + static constexpr auto example_format = "SELECT {}()"; + static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; + + FunctionDocumentation::Description doc_description = FillPolicy::doc_description; + FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name); + FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7."; + FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; + FunctionDocumentation::Categories doc_categories = {"UUID"}; + + factory.template registerFunction>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); +} + REGISTER_FUNCTION(GenerateUUIDv7) { - factory.registerFunction(); + registerUUIDv7Generator(factory); + registerUUIDv7Generator(factory); + registerUUIDv7Generator(factory); } - } - - diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp index 4f336013ca8..1745e17b5e7 100644 --- a/src/Functions/serial.cpp +++ b/src/Functions/serial.cpp @@ -7,6 +7,9 @@ #include #include #include "Common/Logger.h" +#include "Common/ZooKeeper/IKeeper.h" +#include "Common/ZooKeeper/KeeperException.h" +#include "Common/ZooKeeper/Types.h" #include namespace DB { @@ -15,6 +18,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int KEEPER_EXCEPTION; } class FunctionSerial : public IFunction @@ -69,6 +73,15 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (zk == nullptr) { + throw Exception(ErrorCodes::KEEPER_EXCEPTION, + "ZooKeeper is not configured for function {}", + getName()); + } + if (zk->expired()) { + zk = context->getZooKeeper(); + } + auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); size_t size = input_rows_count; @@ -77,78 +90,32 @@ public: const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString(); - // if serial name used first time - zk->createAncestors(serial_path); - zk->createIfNotExists(serial_path, ""); + // CAS in ZooKeeper + // `get` value and version, `trySet` new with version check + // I didn't get how to do it with `multi` Int64 counter; + std::string counter_path = serial_path + "/counter"; - if (zk != nullptr) { - // Get Lock in ZooKeeper - // https://zookeeper.apache.org/doc/r3.2.2/recipes.html + // if serial name used first time + zk->createAncestors(counter_path); + zk->createIfNotExists(counter_path, "1"); - // 1. - if (zk->expired()) { - zk = context->getZooKeeper(); + Coordination::Stat stat; + while (true) { + std::string counter_string = zk->get(counter_path, &stat); + counter = std::stoll(counter_string); + std::string updated_counter = std::to_string(counter + input_rows_count); + Coordination::Error err = zk->trySet(counter_path, updated_counter); + if (err == Coordination::Error::ZOK) { + // CAS is done + break; } - - std::string lock_path = serial_path + "/lock-"; - std::string path_created = zk->create(lock_path, "", zkutil::CreateMode::EphemeralSequential); - Int64 created_sequence_number = std::stoll(path_created.substr(lock_path.size(), path_created.size() - lock_path.size())); - - while (true) { - // 2. - zkutil::Strings children = zk->getChildren(serial_path); - - // 3. - Int64 lowest_child_sequence_number = -1; - for (auto& child : children) { - if (child == "counter") { - continue; - } - std::string child_suffix = child.substr(5, 10); - Int64 seq_number = std::stoll(child_suffix); - - if (lowest_child_sequence_number == -1 || seq_number < lowest_child_sequence_number) { - lowest_child_sequence_number = seq_number; - } - } - - if (lowest_child_sequence_number == created_sequence_number) { - break; - // we have a lock in ZooKeeper, now can get the counter value - } - - // 4. and 5. - Int64 prev_seq_number = created_sequence_number - 1; - std::string to_wait_key = std::to_string(prev_seq_number); - while (to_wait_key.size() != 10) { - to_wait_key = "0" + to_wait_key; - } - - zk->waitForDisappear(lock_path + to_wait_key); + if (err != Coordination::Error::ZBADVERSION) { + throw Exception(ErrorCodes::KEEPER_EXCEPTION, + "ZooKeeper trySet operation failed with unexpected error = {} in function {}", + err, getName()); } - - // Now we have a lock - // Update counter in ZooKeeper - std::string counter_path = serial_path + "/counter"; - if (zk->exists(counter_path)) { - std::string counter_string = zk->get(counter_path, nullptr); - counter = std::stoll(counter_string); - - LOG_INFO(getLogger("Serial Function"), "Got counter from Zookeeper = {}", counter); - } else { - counter = 1; - } - zk->createOrUpdate(counter_path, std::to_string(counter + input_rows_count), zkutil::CreateMode::Persistent); - - // Unlock = delete node created on step 1. - zk->deleteEphemeralNodeIfContentMatches(path_created, ""); - } else { - // ZooKeeper is not available - // What to do? - - counter = 1; } // Make a result @@ -157,7 +124,6 @@ public: ++counter; } - return col_res; } @@ -165,7 +131,39 @@ public: REGISTER_FUNCTION(Serial) { - factory.registerFunction(); + factory.registerFunction(FunctionDocumentation + { + .description=R"( +Generates and returns sequential numbers starting from the previous counter value. +This function takes a constant string argument - a series identifier. +The server should be configured with a ZooKeeper. +)", + .syntax = "serial(identifier)", + .arguments{ + {"series identifier", "Series identifier (String)"} + }, + .returned_value = "Sequential numbers of type Int64 starting from the previous counter value", + .examples{ + {"first call", "SELECT serial('name')", R"( +┌─serial('name')─┐ +│ 1 │ +└────────────────┘)"}, + {"second call", "SELECT serial('name')", R"( +┌─serial('name')─┐ +│ 2 │ +└────────────────┘)"}, + {"column call", "SELECT *, serial('name') FROM test_table", R"( +┌─CounterID─┬─UserID─┬─ver─┬─serial('name')─┐ +│ 1 │ 3 │ 3 │ 3 │ +│ 1 │ 1 │ 1 │ 4 │ +│ 1 │ 2 │ 2 │ 5 │ +│ 1 │ 5 │ 5 │ 6 │ +│ 1 │ 4 │ 4 │ 7 │ +└───────────┴────────┴─────┴────────────────┘ + )"}}, + .categories{"Unique identifiers"} + }); + } } diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference new file mode 100644 index 00000000000..60714f4064f --- /dev/null +++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference @@ -0,0 +1,8 @@ +1 +2 +1 3 3 3 +1 1 1 4 +1 2 2 5 +1 5 5 6 +1 4 4 7 +1 diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql new file mode 100644 index 00000000000..3eacd1ae908 --- /dev/null +++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql @@ -0,0 +1,20 @@ +SELECT serial('x'); +SELECT serial('x'); + +DROP TABLE IF EXISTS default.test_table; + +CREATE TABLE test_table +( + CounterID UInt32, + UserID UInt32, + ver UInt16 +) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/1-1/test_table', 'x', ver) +PARTITION BY CounterID +ORDER BY (CounterID, intHash32(UserID)) +SAMPLE BY intHash32(UserID); + +INSERT INTO test_table VALUES (1, 1, 1), (1, 2, 2), (1, 3, 3), (1, 4, 4), (1, 5, 5); + +SELECT *, serial('x') FROM test_table; + +SELECT serial('y'); \ No newline at end of file From 7dca8c0f75c104715e2c626fd16f1f57b71e1321 Mon Sep 17 00:00:00 2001 From: woodlzm Date: Sat, 11 May 2024 12:28:12 -0700 Subject: [PATCH 158/651] Fix styles for test.py. --- .../test_trace_log_build_id/test.py | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_trace_log_build_id/test.py b/tests/integration/test_trace_log_build_id/test.py index b4a49b2e4d3..26ef3684f86 100644 --- a/tests/integration/test_trace_log_build_id/test.py +++ b/tests/integration/test_trace_log_build_id/test.py @@ -2,8 +2,8 @@ import pytest from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION TEST_QUERY_ID = "test_trace_log_build_id_query_{}" -OLD_TEST_QUERY_ID = TEST_QUERY_ID.format('0') -NEW_TEST_QUERY_ID = TEST_QUERY_ID.format('1') +OLD_TEST_QUERY_ID = TEST_QUERY_ID.format("0") +NEW_TEST_QUERY_ID = TEST_QUERY_ID.format("1") ACTIVE_TRACE_LOG_TABLE = "trace_log" RENAMED_TRACE_LOG_TABLE = "trace_log_0" @@ -37,7 +37,6 @@ def test_trace_log_build_id(started_cluster): # We make queries to create entries in trace_log, then restart with new version and verify if the old # trace_log table is renamed and a new trace_log table is created. - query_for_table_name = "EXISTS TABLE system.{table}" node.query( @@ -45,8 +44,12 @@ def test_trace_log_build_id(started_cluster): query_id=OLD_TEST_QUERY_ID, ) node.query("SYSTEM FLUSH LOGS") - assert node.query(query_for_table_name.format(table=ACTIVE_TRACE_LOG_TABLE)) == "1\n" - assert node.query(query_for_table_name.format(table=RENAMED_TRACE_LOG_TABLE)) == "0\n" + assert ( + node.query(query_for_table_name.format(table=ACTIVE_TRACE_LOG_TABLE)) == "1\n" + ) + assert ( + node.query(query_for_table_name.format(table=RENAMED_TRACE_LOG_TABLE)) == "0\n" + ) node.restart_with_latest_version() @@ -63,7 +66,28 @@ def test_trace_log_build_id(started_cluster): query_id=NEW_TEST_QUERY_ID, ) node.query("SYSTEM FLUSH LOGS") - assert node.query(query_for_test_query_id.format(table=ACTIVE_TRACE_LOG_TABLE, query_id=OLD_TEST_QUERY_ID)) == "0\n" - assert node.query(query_for_test_query_id.format(table=ACTIVE_TRACE_LOG_TABLE, query_id=NEW_TEST_QUERY_ID)) == "1\n" - assert node.query(query_for_test_query_id.format(table=RENAMED_TRACE_LOG_TABLE, query_id=OLD_TEST_QUERY_ID)) == "1\n" + assert ( + node.query( + query_for_test_query_id.format( + table=ACTIVE_TRACE_LOG_TABLE, query_id=OLD_TEST_QUERY_ID + ) + ) + == "0\n" + ) + assert ( + node.query( + query_for_test_query_id.format( + table=ACTIVE_TRACE_LOG_TABLE, query_id=NEW_TEST_QUERY_ID + ) + ) + == "1\n" + ) + assert ( + node.query( + query_for_test_query_id.format( + table=RENAMED_TRACE_LOG_TABLE, query_id=OLD_TEST_QUERY_ID + ) + ) + == "1\n" + ) From 06409bf53ddc9fade924532cc6bb58f150ca8934 Mon Sep 17 00:00:00 2001 From: woodlzm Date: Sat, 11 May 2024 12:28:12 -0700 Subject: [PATCH 159/651] Fix styles for test.py. --- tests/integration/test_trace_log_build_id/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_trace_log_build_id/test.py b/tests/integration/test_trace_log_build_id/test.py index 26ef3684f86..84392ab12b1 100644 --- a/tests/integration/test_trace_log_build_id/test.py +++ b/tests/integration/test_trace_log_build_id/test.py @@ -90,4 +90,3 @@ def test_trace_log_build_id(started_cluster): ) == "1\n" ) - From 0ce90f4ef44a8c3f51e3bb57e810c5bef6e1f492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Sun, 12 May 2024 18:13:17 +0300 Subject: [PATCH 160/651] Prevent conversion to Replicated if zookeeper path already exists --- src/Databases/DatabaseOrdinary.cpp | 14 ++++++ .../configs/config.d/clusters.xml | 4 +- .../test_modify_engine_on_restart/test.py | 45 +++++++++++++++++-- 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index d571fe4491d..22b553ecd1e 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -76,6 +76,20 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex String replica_path = server_settings.default_replica_path; String replica_name = server_settings.default_replica_name; + /// Check that replica path doesn't exist + Macros::MacroExpansionInfo info; + StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid); + info.table_id = table_id; + info.expand_special_macros_only = false; + + String zookeeper_path = context->getMacros()->expand(replica_path, info); + if (context->getZooKeeper()->exists(zookeeper_path)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.", + zookeeper_path, backQuote(table_id.getFullTableName()) + ); + auto args = std::make_shared(); args->children.push_back(std::make_shared(replica_path)); args->children.push_back(std::make_shared(replica_name)); diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml index d3a9d4fb8f0..fbcf6499ec1 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml @@ -15,8 +15,10 @@ +/clickhouse/tables/{database}/{table}/{uuid} + 01 - \ No newline at end of file + diff --git a/tests/integration/test_modify_engine_on_restart/test.py b/tests/integration/test_modify_engine_on_restart/test.py index 289b25dd89e..8af7ec8ae83 100644 --- a/tests/integration/test_modify_engine_on_restart/test.py +++ b/tests/integration/test_modify_engine_on_restart/test.py @@ -1,5 +1,9 @@ import pytest -from test_modify_engine_on_restart.common import check_flags_deleted, set_convert_flags +from test_modify_engine_on_restart.common import ( + check_flags_deleted, + set_convert_flags, + get_table_path, +) from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -120,7 +124,7 @@ def check_replica_added(): q( ch2, - f"CREATE TABLE rmt ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{uuid}/{{shard}}', '{{replica}}') PARTITION BY toYYYYMM(D) ORDER BY A", + f"CREATE TABLE rmt ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database_name}/rmt/{uuid}', '{{replica}}') PARTITION BY toYYYYMM(D) ORDER BY A", ) ch2.query(database=database_name, sql="SYSTEM SYNC REPLICA rmt", timeout=20) @@ -136,7 +140,7 @@ def check_replica_added(): def test_modify_engine_on_restart(started_cluster): - ch1.query("CREATE DATABASE " + database_name + " ON CLUSTER cluster") + ch1.query("CREATE DATABASE IF NOT EXISTS " + database_name + " ON CLUSTER cluster") create_tables() @@ -159,3 +163,38 @@ def test_modify_engine_on_restart(started_cluster): ch1.restart_clickhouse() check_tables(True) + + +def test_modify_engine_fails_if_zk_path_exists(started_cluster): + ch1.query("CREATE DATABASE IF NOT EXISTS " + database_name + " ON CLUSTER cluster") + + q( + ch1, + "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", + ) + uuid = q( + ch1, + f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", + ).strip("'[]\n") + + q( + ch1, + f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", + ) + + set_convert_flags(ch1, database_name, ["already_exists_1"]) + + table_data_path = get_table_path(ch1, "already_exists_1", database_name) + + ch1.stop_clickhouse() + ch1.start_clickhouse(retry_start=False, expected_to_fail=True) + + # Check if we can cancel convertation + ch1.exec_in_container( + [ + "bash", + "-c", + f"rm {table_data_path}convert_to_replicated", + ] + ) + ch1.start_clickhouse() From 5f2228cd16dcdfc212450c214ef4014410e1b0cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Mon, 13 May 2024 00:35:38 +0300 Subject: [PATCH 161/651] Fix test --- .../integration/test_modify_engine_on_restart/test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_modify_engine_on_restart/test.py b/tests/integration/test_modify_engine_on_restart/test.py index 8af7ec8ae83..81854340752 100644 --- a/tests/integration/test_modify_engine_on_restart/test.py +++ b/tests/integration/test_modify_engine_on_restart/test.py @@ -40,8 +40,8 @@ def started_cluster(): cluster.shutdown() -def q(node, query): - return node.query(database=database_name, sql=query) +def q(node, query, database=database_name): + return node.query(database=database, sql=query) def create_tables(): @@ -166,20 +166,24 @@ def test_modify_engine_on_restart(started_cluster): def test_modify_engine_fails_if_zk_path_exists(started_cluster): - ch1.query("CREATE DATABASE IF NOT EXISTS " + database_name + " ON CLUSTER cluster") + database_name = "zk_path" + ch1.query("CREATE DATABASE " + database_name + " ON CLUSTER cluster") q( ch1, "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", + database_name, ) uuid = q( ch1, f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", + database_name, ).strip("'[]\n") q( ch1, f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", + database_name, ) set_convert_flags(ch1, database_name, ["already_exists_1"]) From 9789d130a6cad5da2941037d91c69d9d63aa2733 Mon Sep 17 00:00:00 2001 From: Danila Puzov Date: Mon, 13 May 2024 01:11:23 +0300 Subject: [PATCH 162/651] Tests and docs for generateSnowflakeID and fixes --- src/Functions/generateSnowflakeID.cpp | 144 +++++++++++++----- src/Functions/serial.cpp | 36 ++--- .../03129_serial_test_zookeeper.reference | 15 +- .../03129_serial_test_zookeeper.sql | 24 +-- .../03130_generate_snowflake_id.reference | 3 + .../03130_generate_snowflake_id.sql | 11 ++ 6 files changed, 154 insertions(+), 79 deletions(-) create mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.reference create mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.sql diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index dd837a58325..1decda0ab46 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -1,7 +1,11 @@ -#include #include +#include #include +#include #include +#include +#include + namespace DB { @@ -38,15 +42,32 @@ constexpr auto machine_seq_num_size = 12; constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size); constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size; constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1; +constexpr int64_t max_machine_seq_num = machine_seq_num_mask; + +Int64 getMachineID() +{ + auto serverUUID = ServerUUID::get(); + + // hash serverUUID into 64 bits + Int64 h = UUIDHelpers::getHighBytes(serverUUID); + Int64 l = UUIDHelpers::getLowBytes(serverUUID); + return ((h * 11) ^ (l * 17)) & machine_id_mask; +} + +Int64 getTimestamp() +{ + const auto tm_point = std::chrono::system_clock::now(); + return std::chrono::duration_cast( + tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1); +} } class FunctionSnowflakeID : public IFunction { private: - mutable std::atomic state{0}; - // previous snowflake id - // state is 1 atomic value because we don't want use mutex + mutable std::atomic lowest_available_snowflake_id{0}; + // 1 atomic value because we don't want to use mutex public: static constexpr auto name = "generateSnowflakeID"; @@ -58,23 +79,19 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } - + bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } bool isVariadic() const override { return true; } - bool isStateful() const override { return true; } - bool isDeterministic() const override { return false; } - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() > 1) { + if (!arguments.empty()) { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", + "Number of arguments for function {} doesn't match: passed {}, should be 0.", getName(), arguments.size()); } - return std::make_shared(); } @@ -83,36 +100,57 @@ public: { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - size_t size = input_rows_count; - vec_to.resize(size); + Int64 size64 = static_cast(input_rows_count); + vec_to.resize(input_rows_count); - auto serverUUID = ServerUUID::get(); + if (input_rows_count == 0) { + return col_res; + } - // hash serverUUID into 32 bytes - Int64 h = UUIDHelpers::getHighBytes(serverUUID); - Int64 l = UUIDHelpers::getLowBytes(serverUUID); - Int64 machine_id = ((h * 11) ^ (l * 17)) & machine_id_mask; + Int64 machine_id = getMachineID(); + Int64 current_timestamp = getTimestamp(); + Int64 current_machine_seq_num; - for (Int64 & el : vec_to) { - const auto tm_point = std::chrono::system_clock::now(); - Int64 current_timestamp = std::chrono::duration_cast( - tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1); + Int64 available_id, next_available_id; + do + { + available_id = lowest_available_snowflake_id.load(); + Int64 available_timestamp = (available_id & timestamp_mask) >> (machine_id_size + machine_seq_num_size); + Int64 available_machine_seq_num = available_id & machine_seq_num_mask; - Int64 last_state, new_state; - do { - last_state = state.load(); - Int64 last_timestamp = (last_state & timestamp_mask) >> (machine_id_size + machine_seq_num_size); - Int64 machine_seq_num = last_state & machine_seq_num_mask; + if (current_timestamp > available_timestamp) + { + current_machine_seq_num = 0; + } + else + { + current_timestamp = available_timestamp; + current_machine_seq_num = available_machine_seq_num; + } - if (current_timestamp == last_timestamp) { - ++machine_seq_num; - } - new_state = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | machine_seq_num; - } while (!state.compare_exchange_strong(last_state, new_state)); - // failed CAS => another thread updated state - // successful CAS => we have unique (timestamp, machine_seq_num) on this machine + // calculate new `lowest_available_snowflake_id` + Int64 new_timestamp; + Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1); + if (size64 >= seq_nums_in_current_timestamp_left) { + new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num; + } else { + new_timestamp = current_timestamp; + } + Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask; + next_available_id = (new_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | new_machine_seq_num; + } + while (!lowest_available_snowflake_id.compare_exchange_strong(available_id, next_available_id)); + // failed CAS => another thread updated `lowest_available_snowflake_id` + // successful CAS => we have our range of exclusive values - el = new_state; + for (Int64 & el : vec_to) + { + el = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | current_machine_seq_num; + if (current_machine_seq_num++ == max_machine_seq_num) + { + current_machine_seq_num = 0; + ++current_timestamp; + } } return col_res; @@ -122,7 +160,41 @@ public: REGISTER_FUNCTION(GenerateSnowflakeID) { - factory.registerFunction(); + factory.registerFunction(FunctionDocumentation + { + .description=R"( +Generates Snowflake ID -- unique identificators contains: +- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds +- The middle 10 bits are the machine ID. +- The last 12 bits decode to number of ids processed by the machine at the given millisecond. + +In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0. +This function guarantees strict monotony on 1 machine and differences in values obtained on different machines. +)", + .syntax = "generateSnowflakeID()", + .arguments{}, + .returned_value = "Column of Int64", + .examples{ + {"single call", "SELECT generateSnowflakeID();", R"( +┌─generateSnowflakeID()─┐ +│ 7195510166884597760 │ +└───────────────────────┘)"}, + {"column call", "SELECT generateSnowflakeID() FROM numbers(10);", R"( +┌─generateSnowflakeID()─┐ +│ 7195516038159417344 │ +│ 7195516038159417345 │ +│ 7195516038159417346 │ +│ 7195516038159417347 │ +│ 7195516038159417348 │ +│ 7195516038159417349 │ +│ 7195516038159417350 │ +│ 7195516038159417351 │ +│ 7195516038159417352 │ +│ 7195516038159417353 │ +└───────────────────────┘)"}, + }, + .categories{"Unique identifiers", "Snowflake ID"} + }); } } diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp index 1745e17b5e7..3da2f4ce218 100644 --- a/src/Functions/serial.cpp +++ b/src/Functions/serial.cpp @@ -1,18 +1,11 @@ -#include -#include -#include -#include +#include #include #include #include #include -#include "Common/Logger.h" -#include "Common/ZooKeeper/IKeeper.h" -#include "Common/ZooKeeper/KeeperException.h" -#include "Common/ZooKeeper/Types.h" -#include -namespace DB { +namespace DB +{ namespace ErrorCodes { @@ -62,30 +55,26 @@ public: throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()); - if (!isStringOrFixedString(arguments[0])) { + if (!isStringOrFixedString(arguments[0])) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type of argument for function {} doesn't match: passed {}, should be string", getName(), arguments[0]->getName()); - } return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (zk == nullptr) { + if (zk == nullptr) throw Exception(ErrorCodes::KEEPER_EXCEPTION, "ZooKeeper is not configured for function {}", getName()); - } - if (zk->expired()) { + if (zk->expired()) zk = context->getZooKeeper(); - } auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); size_t size = input_rows_count; - LOG_INFO(getLogger("Serial Function"), "Size = {}", size); vec_to.resize(size); const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString(); @@ -102,16 +91,19 @@ public: zk->createIfNotExists(counter_path, "1"); Coordination::Stat stat; - while (true) { + while (true) + { std::string counter_string = zk->get(counter_path, &stat); counter = std::stoll(counter_string); std::string updated_counter = std::to_string(counter + input_rows_count); Coordination::Error err = zk->trySet(counter_path, updated_counter); - if (err == Coordination::Error::ZOK) { + if (err == Coordination::Error::ZOK) + { // CAS is done break; } - if (err != Coordination::Error::ZBADVERSION) { + if (err != Coordination::Error::ZBADVERSION) + { throw Exception(ErrorCodes::KEEPER_EXCEPTION, "ZooKeeper trySet operation failed with unexpected error = {} in function {}", err, getName()); @@ -119,7 +111,8 @@ public: } // Make a result - for (auto& val : vec_to) { + for (auto& val : vec_to) + { val = counter; ++counter; } @@ -163,7 +156,6 @@ The server should be configured with a ZooKeeper. )"}}, .categories{"Unique identifiers"} }); - } } diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference index 60714f4064f..479030db4be 100644 --- a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference +++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference @@ -1,8 +1,13 @@ 1 2 -1 3 3 3 -1 1 1 4 -1 2 2 5 -1 5 5 6 -1 4 4 7 1 +3 +4 +5 +6 +7 +1 1 +2 2 +3 3 +4 4 +5 5 diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql index 3eacd1ae908..c3395009477 100644 --- a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql +++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql @@ -1,20 +1,12 @@ +-- Tags: zookeeper + SELECT serial('x'); SELECT serial('x'); +SELECT serial('y'); +SELECT serial('x') FROM numbers(5); -DROP TABLE IF EXISTS default.test_table; +SELECT serial(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT serial('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT serial(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -CREATE TABLE test_table -( - CounterID UInt32, - UserID UInt32, - ver UInt16 -) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/1-1/test_table', 'x', ver) -PARTITION BY CounterID -ORDER BY (CounterID, intHash32(UserID)) -SAMPLE BY intHash32(UserID); - -INSERT INTO test_table VALUES (1, 1, 1), (1, 2, 2), (1, 3, 3), (1, 4, 4), (1, 5, 5); - -SELECT *, serial('x') FROM test_table; - -SELECT serial('y'); \ No newline at end of file +SELECT serial('z'), serial('z') FROM numbers(5); diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.reference b/tests/queries/0_stateless/03130_generate_snowflake_id.reference new file mode 100644 index 00000000000..2049ba26379 --- /dev/null +++ b/tests/queries/0_stateless/03130_generate_snowflake_id.reference @@ -0,0 +1,3 @@ +1 +1 +10 diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.sql b/tests/queries/0_stateless/03130_generate_snowflake_id.sql new file mode 100644 index 00000000000..669814c9ecb --- /dev/null +++ b/tests/queries/0_stateless/03130_generate_snowflake_id.sql @@ -0,0 +1,11 @@ +SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; +SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; + +SELECT generateSnowflakeID(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT count(*) +FROM +( + SELECT DISTINCT generateSnowflakeID() + FROM numbers(10) +) \ No newline at end of file From 8e63d2f795d4e653ff4885212919725a7bb6a074 Mon Sep 17 00:00:00 2001 From: v01dxyz Date: Mon, 13 May 2024 09:21:01 +0200 Subject: [PATCH 163/651] Compress STDOUT if redirected to file with a compression extension * Add a new member to ClientBase: default_output_compression_method * Move the code to get file path from file descriptor to a separate Common function. The stateless test is almost a copy-paste of 02001_compress_output_file. Fixes https://github.com/ClickHouse/ClickHouse/issues/63496 --- programs/client/Client.cpp | 2 +- programs/local/LocalServer.cpp | 2 +- src/Client/ClientBase.cpp | 10 +++++- src/Client/ClientBase.h | 3 +- src/Common/tryGetFileNameByFileDescriptor.cpp | 33 +++++++++++++++++++ src/Common/tryGetFileNameByFileDescriptor.h | 10 ++++++ src/Formats/FormatFactory.cpp | 22 ++++--------- .../03144_compress_stdout.reference | 2 ++ .../0_stateless/03144_compress_stdout.sh | 23 +++++++++++++ 9 files changed, 88 insertions(+), 19 deletions(-) create mode 100644 src/Common/tryGetFileNameByFileDescriptor.cpp create mode 100644 src/Common/tryGetFileNameByFileDescriptor.h create mode 100644 tests/queries/0_stateless/03144_compress_stdout.reference create mode 100755 tests/queries/0_stateless/03144_compress_stdout.sh diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 396cd3e646b..9ae5dd735ed 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1178,7 +1178,7 @@ void Client::processConfig() pager = config().getString("pager", ""); - setDefaultFormatsFromConfiguration(); + setDefaultFormatsAndCompressionFromConfiguration(); global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); global_context->setQueryKindInitial(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 5f2a51406e1..f18c0306254 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -611,7 +611,7 @@ void LocalServer::processConfig() if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); - setDefaultFormatsFromConfiguration(); + setDefaultFormatsAndCompressionFromConfiguration(); /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index bd4430648c5..61d95e6eb4c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -643,6 +644,9 @@ try bool extras_into_stdout = need_render_progress || logs_into_stdout; bool select_only_into_file = select_into_file && !select_into_file_and_stdout; + if (!out_file_buf && default_output_compression_method != CompressionMethod::None) + out_file_buf = wrapWriteBufferWithCompressionMethod(out_buf, default_output_compression_method, 3, 0); + /// It is not clear how to write progress and logs /// intermixed with data with parallel formatting. /// It may increase code complexity significantly. @@ -735,7 +739,7 @@ bool ClientBase::isRegularFile(int fd) return fstat(fd, &file_stat) == 0 && S_ISREG(file_stat.st_mode); } -void ClientBase::setDefaultFormatsFromConfiguration() +void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() { if (config().has("output-format")) { @@ -759,6 +763,10 @@ void ClientBase::setDefaultFormatsFromConfiguration() default_output_format = *format_from_file_name; else default_output_format = "TSV"; + + std::optional file_name = tryGetFileNameFromFileDescriptor(STDOUT_FILENO); + if (file_name) + default_output_compression_method = chooseCompressionMethod(*file_name, ""); } else if (is_interactive) { diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 64cbdbe8989..7a0489641c8 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -190,7 +190,7 @@ protected: /// Adjust some settings after command line options and config had been processed. void adjustSettings(); - void setDefaultFormatsFromConfiguration(); + void setDefaultFormatsAndCompressionFromConfiguration(); void initTTYBuffer(ProgressOption progress); @@ -224,6 +224,7 @@ protected: String pager; String default_output_format; /// Query results output format. + CompressionMethod default_output_compression_method = CompressionMethod::None; String default_input_format; /// Tables' format for clickhouse-local. bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering. diff --git a/src/Common/tryGetFileNameByFileDescriptor.cpp b/src/Common/tryGetFileNameByFileDescriptor.cpp new file mode 100644 index 00000000000..47e81050388 --- /dev/null +++ b/src/Common/tryGetFileNameByFileDescriptor.cpp @@ -0,0 +1,33 @@ +#include + +#ifdef OS_LINUX +# include +#elif defined(OS_DARWIN) +# include +#endif + +#include + + +namespace DB +{ +std::optional tryGetFileNameFromFileDescriptor(int fd) +{ +#ifdef OS_LINUX + std::string proc_path = fmt::format("/proc/self/fd/{}", fd); + char file_path[PATH_MAX] = {'\0'}; + if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1) + return file_path; + return std::nullopt; +#elif defined(OS_DARWIN) + char file_path[PATH_MAX] = {'\0'}; + if (fcntl(fd, F_GETPATH, file_path) != -1) + return file_path; + return std::nullopt; +#else + (void)fd; + return std::nullopt; +#endif +} + +} diff --git a/src/Common/tryGetFileNameByFileDescriptor.h b/src/Common/tryGetFileNameByFileDescriptor.h new file mode 100644 index 00000000000..c38ccb4f851 --- /dev/null +++ b/src/Common/tryGetFileNameByFileDescriptor.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +namespace DB +{ +/// Supports only Linux/MacOS. On other platforms, returns nullopt. +std::optional tryGetFileNameFromFileDescriptor(int fd); +} diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index b7e9899da46..783daba44fd 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,7 @@ #include #include #include -#include +#include #include @@ -692,21 +693,12 @@ String FormatFactory::getFormatFromFileName(String file_name) std::optional FormatFactory::tryGetFormatFromFileDescriptor(int fd) { -#ifdef OS_LINUX - std::string proc_path = fmt::format("/proc/self/fd/{}", fd); - char file_path[PATH_MAX] = {'\0'}; - if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1) - return tryGetFormatFromFileName(file_path); + std::optional file_name = tryGetFileNameFromFileDescriptor(fd); + + if (file_name) + return tryGetFormatFromFileName(*file_name); + return std::nullopt; -#elif defined(OS_DARWIN) - char file_path[PATH_MAX] = {'\0'}; - if (fcntl(fd, F_GETPATH, file_path) != -1) - return tryGetFormatFromFileName(file_path); - return std::nullopt; -#else - (void)fd; - return std::nullopt; -#endif } String FormatFactory::getFormatFromFileDescriptor(int fd) diff --git a/tests/queries/0_stateless/03144_compress_stdout.reference b/tests/queries/0_stateless/03144_compress_stdout.reference new file mode 100644 index 00000000000..6f51dfc24e1 --- /dev/null +++ b/tests/queries/0_stateless/03144_compress_stdout.reference @@ -0,0 +1,2 @@ +Hello, World! From client. +Hello, World! From local. diff --git a/tests/queries/0_stateless/03144_compress_stdout.sh b/tests/queries/0_stateless/03144_compress_stdout.sh new file mode 100755 index 00000000000..569754303a7 --- /dev/null +++ b/tests/queries/0_stateless/03144_compress_stdout.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz +gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz +cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client + +rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client" + +[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz + +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz +gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz +cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local + +rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local" From 6a94ba370a3a294f7f2b1471214be6ecfd6eaa7b Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Mon, 13 May 2024 09:43:03 +0200 Subject: [PATCH 164/651] Fix clang-tidy errors --- src/Functions/FunctionsHashing.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index bccdba5ee69..1091ec6c86f 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -94,8 +94,8 @@ namespace impl i = 0; if (offsets != nullptr) { - const auto begin = offsets->begin(); - auto upper = std::upper_bound(begin, offsets->end(), i); + const auto *const begin = offsets->begin(); + const auto * upper = std::upper_bound(begin, offsets->end(), i); if (upper == offsets->end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i); i = upper - begin; From f1f668e7df24190eaf4f1d67360b9e53099289d2 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 10 May 2024 14:15:01 +0200 Subject: [PATCH 165/651] Setup node generator initial --- utils/keeper-bench/Runner.cpp | 288 ++++++++++++++++++++++++++++++---- utils/keeper-bench/Runner.h | 3 + utils/keeper-bench/main.cpp | 2 + 3 files changed, 265 insertions(+), 28 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index a893dac3851..0050230b6ec 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,17 +1,22 @@ #include "Runner.h" #include -#include #include +#include +#include +#include #include "Common/ConcurrentBoundedQueue.h" +#include "Common/Exception.h" #include "Common/ZooKeeper/IKeeper.h" #include "Common/ZooKeeper/ZooKeeperArgs.h" #include "Common/ZooKeeper/ZooKeeperCommon.h" #include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include +#include "Coordination/KeeperSnapshotManager.h" #include "Core/ColumnWithTypeAndName.h" #include "Core/ColumnsWithTypeAndName.h" +#include #include "IO/ReadBuffer.h" #include "IO/ReadBufferFromFile.h" #include "base/Decimal.h" @@ -43,12 +48,14 @@ Runner::Runner( std::optional concurrency_, const std::string & config_path, const std::string & input_request_log_, + const std::string & setup_nodes_snapshot_path_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, std::optional continue_on_error_, std::optional max_iterations_) : input_request_log(input_request_log_) + , setup_nodes_snapshot_path(setup_nodes_snapshot_path_) , info(std::make_shared()) { @@ -381,18 +388,18 @@ struct ZooKeeperRequestBlock { explicit ZooKeeperRequestBlock(DB::Block block_) : block(std::move(block_)) - , hostname_idx(block.getPositionByName("hostname")) // - , request_event_time_idx(block.getPositionByName("request_event_time")) // - , thread_id_idx(block.getPositionByName("thread_id")) // - , session_id_idx(block.getPositionByName("session_id")) // - , xid_idx(block.getPositionByName("xid")) // + , hostname_idx(block.getPositionByName("hostname")) + , request_event_time_idx(block.getPositionByName("request_event_time")) + , thread_id_idx(block.getPositionByName("thread_id")) + , session_id_idx(block.getPositionByName("session_id")) + , xid_idx(block.getPositionByName("xid")) , has_watch_idx(block.getPositionByName("has_watch")) , op_num_idx(block.getPositionByName("op_num")) , path_idx(block.getPositionByName("path")) , data_idx(block.getPositionByName("data")) , is_ephemeral_idx(block.getPositionByName("is_ephemeral")) , is_sequential_idx(block.getPositionByName("is_sequential")) - , response_event_time_idx(block.getPositionByName("response_event_time")) // + , response_event_time_idx(block.getPositionByName("response_event_time")) , error_idx(block.getPositionByName("error")) , requests_size_idx(block.getPositionByName("requests_size")) , version_idx(block.getPositionByName("version")) @@ -519,6 +526,7 @@ struct RequestFromLog { Coordination::ZooKeeperRequestPtr request; std::optional expected_result; + std::vector> subrequest_expected_results; int64_t session_id = 0; size_t executor_id = 0; bool has_watch = false; @@ -586,7 +594,6 @@ struct ZooKeeperRequestFromLogReader idx_in_block = 0; } - request_from_log.expected_result = current_block->getError(idx_in_block); request_from_log.session_id = current_block->getSessionId(idx_in_block); request_from_log.has_watch = current_block->hasWatch(idx_in_block); @@ -693,6 +700,12 @@ struct ZooKeeperRequestFromLogReader if (!subrequest_from_log) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to fetch subrequest for {}, subrequest index {}", op_num, i); + if (!subrequest_from_log->expected_result && request_from_log.expected_result + && request_from_log.expected_result == Coordination::Error::ZOK) + { + subrequest_from_log->expected_result = Coordination::Error::ZOK; + } + requests.push_back(std::move(subrequest_from_log->request)); if (subrequest_from_log->session_id != request_from_log.session_id) @@ -700,6 +713,8 @@ struct ZooKeeperRequestFromLogReader if (subrequest_from_log->executor_id != request_from_log.executor_id) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Executor id mismatch for subrequest in {}, subrequest index {}", op_num, i); + + request_from_log.subrequest_expected_results.push_back(subrequest_from_log->expected_result); } request_from_log.request = std::make_shared(requests, default_acls); @@ -731,7 +746,6 @@ private: namespace { - struct RequestFromLogStats { struct Stats @@ -744,6 +758,192 @@ struct RequestFromLogStats Stats read_requests; }; +struct SetupNodeCollector +{ + explicit SetupNodeCollector(const std::string & setup_nodes_snapshot_path) + { + if (setup_nodes_snapshot_path.empty()) + return; + + keeper_context = std::make_shared(true, std::make_shared()); + keeper_context->setDigestEnabled(true); + keeper_context->setSnapshotDisk( + std::make_shared("Keeper-snapshots", setup_nodes_snapshot_path)); + + snapshot_manager.emplace(1, keeper_context); + auto snapshot_result = snapshot_manager->restoreFromLatestSnapshot(); + if (snapshot_result.storage == nullptr) + { + std::cerr << "No initial snapshot found" << std::endl; + initial_storage = std::make_unique( + /* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); + initial_storage->initializeSystemNodes(); + } + else + { + std::cerr << "Loaded initial nodes from snapshot" << std::endl; + initial_storage = std::move(snapshot_result.storage); + } + } + + void processRequest(const RequestFromLog & request_from_log) + { + if (!request_from_log.expected_result.has_value()) + return; + + auto process_request = [&](const Coordination::ZooKeeperRequest & request, const auto expected_result) + { + const auto & path = request.getPath(); + if (processed_paths.contains(path)) + return; + + auto op_num = request.getOpNum(); + + if (op_num == Coordination::OpNum::Create) + { + if (expected_result == Coordination::Error::ZNODEEXISTS) + { + addExpectedNode(path); + processed_paths.insert(path); + } + else if (expected_result == Coordination::Error::ZOK) + { + /// we need to make sure ancestors exist + auto position = path.find_last_of('/'); + if (position != 0) + { + auto parent_path = path.substr(0, position); + if (!processed_paths.contains(parent_path)) + { + addExpectedNode(parent_path); + processed_paths.insert(parent_path); + } + } + + processed_paths.insert(path); + } + } + else if (op_num == Coordination::OpNum::Remove) + { + if (expected_result == Coordination::Error::ZOK) + { + addExpectedNode(path); + processed_paths.insert(path); + } + } + else if (op_num == Coordination::OpNum::Set) + { + if (expected_result == Coordination::Error::ZOK) + { + addExpectedNode(path); + processed_paths.insert(path); + } + } + else if (op_num == Coordination::OpNum::Check) + { + if (expected_result == Coordination::Error::ZOK) + { + addExpectedNode(path); + processed_paths.insert(path); + } + } + else if (op_num == Coordination::OpNum::CheckNotExists) + { + if (expected_result == Coordination::Error::ZNODEEXISTS) + { + addExpectedNode(path); + processed_paths.insert(path); + } + } + else if (request.isReadRequest()) + { + if (expected_result == Coordination::Error::ZOK) + { + addExpectedNode(path); + processed_paths.insert(path); + } + } + }; + + const auto & request = request_from_log.request; + if (request->getOpNum() == Coordination::OpNum::Multi || request->getOpNum() == Coordination::OpNum::MultiRead) + { + const auto & multi_request = dynamic_cast(*request); + const auto & subrequests = multi_request.requests; + + for (size_t i = 0; i < subrequests.size(); ++i) + { + const auto & zookeeper_request = dynamic_cast(*subrequests[i]); + const auto subrequest_expected_result = request_from_log.subrequest_expected_results[i]; + if (subrequest_expected_result.has_value()) + process_request(zookeeper_request, *subrequest_expected_result); + + } + } + else + process_request(*request, *request_from_log.expected_result); + } + + void addExpectedNode(const std::string & path) + { + std::lock_guard lock(nodes_mutex); + + if (initial_storage->container.contains(path)) + return; + + std::cerr << "Adding expected node " << path << std::endl; + + Coordination::Requests create_ops; + + size_t pos = 1; + while (true) + { + pos = path.find('/', pos); + if (pos == std::string::npos) + break; + + auto request = zkutil::makeCreateRequest(path.substr(0, pos), "", zkutil::CreateMode::Persistent, true); + create_ops.emplace_back(request); + ++pos; + } + + auto request = zkutil::makeCreateRequest(path, "", zkutil::CreateMode::Persistent, true); + create_ops.emplace_back(request); + + auto next_zxid = initial_storage->getNextZXID(); + + static Coordination::ACLs default_acls = [] + { + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + return Coordination::ACLs{std::move(acl)}; + }(); + + auto multi_create_request = std::make_shared(create_ops, default_acls); + initial_storage->preprocessRequest(multi_create_request, 1, 0, next_zxid, /* check_acl = */ false); + auto responses = initial_storage->processRequest(multi_create_request, 1, next_zxid, /* check_acl = */ false); + if (responses.size() > 1 || responses[0].response->error != Coordination::Error::ZOK) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid response after trying to create a node {}", responses[0].response->error); + } + + void generateSnapshot() + { + std::cerr << "Generating snapshot with starting data" << std::endl; + std::lock_guard lock(nodes_mutex); + DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(initial_storage->getZXID(), 1, std::make_shared()); + DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); + snapshot_manager->serializeSnapshotToDisk(snapshot); + } + + std::mutex nodes_mutex; + DB::KeeperContextPtr keeper_context; + Coordination::KeeperStoragePtr initial_storage; + std::unordered_set processed_paths; + std::optional snapshot_manager; +}; + void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type) { std::cerr << fmt::format( @@ -751,7 +951,7 @@ void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_f type, stats_for_type.total, stats_for_type.unexpected_results, - static_cast(stats_for_type.unexpected_results) / stats_for_type.total * 100) + stats_for_type.total != 0 ? static_cast(stats_for_type.unexpected_results) / stats_for_type.total * 100 : 0.0) << std::endl; }; @@ -763,24 +963,40 @@ void requestFromLogExecutor(std::shared_ptr>(); last_request = request_promise->get_future(); - Coordination::ResponseCallback callback - = [&, request_promise, request = request_from_log.request, expected_result = request_from_log.expected_result]( - const Coordination::Response & response) mutable + Coordination::ResponseCallback callback = [&, + request_promise, + request = request_from_log.request, + expected_result = request_from_log.expected_result, + subrequest_expected_results = std::move(request_from_log.subrequest_expected_results)]( + const Coordination::Response & response) mutable { auto & stats = request->isReadRequest() ? request_stats.read_requests : request_stats.write_requests; stats.total.fetch_add(1, std::memory_order_relaxed); - if (*expected_result != response.error) - stats.unexpected_results.fetch_add(1, std::memory_order_relaxed); + if (expected_result) + { + if (*expected_result != response.error) + stats.unexpected_results.fetch_add(1, std::memory_order_relaxed); - //if (!expected_result) - // return; + if (*expected_result != response.error) + { + std::cerr << fmt::format( + "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result) + << std::endl; - //if (*expected_result != response.error) - // std::cerr << fmt::format( - // "Unexpected result for {}, got {}, expected {}", request->getOpNum(), response.error, *expected_result) - // << std::endl; + if (const auto * multi_response = dynamic_cast(&response)) + { + std::string subresponses; + for (size_t i = 0; i < multi_response->responses.size(); ++i) + { + subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); + } + + std::cerr << "Subresponses\n" << subresponses << std::endl; + } + } + } request_promise->set_value(); }; @@ -827,6 +1043,9 @@ void Runner::runBenchmarkFromLog() RequestFromLogStats stats; + std::optional setup_nodes_collector; + if (!setup_nodes_snapshot_path.empty()) + setup_nodes_collector.emplace(setup_nodes_snapshot_path); std::unordered_map>> executor_id_to_queue; @@ -850,7 +1069,7 @@ void Runner::runBenchmarkFromLog() return; } - auto executor_queue = std::make_shared>(std::numeric_limits().max()); + auto executor_queue = std::make_shared>(std::numeric_limits::max()); executor_id_to_queue.emplace(request.executor_id, executor_queue); auto scheduled = pool->trySchedule([&, executor_queue]() mutable { @@ -865,6 +1084,7 @@ void Runner::runBenchmarkFromLog() throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue"); }; + if (!setup_nodes_collector) { auto setup_connection = getConnection(connection_infos[0], 0); benchmark_context.startup(*setup_connection); @@ -875,14 +1095,26 @@ void Runner::runBenchmarkFromLog() delay_watch.restart(); while (auto request_from_log = request_reader.getNextRequest()) { - request_from_log->connection = get_zookeeper_connection(request_from_log->session_id); - push_request(std::move(*request_from_log)); + if (setup_nodes_collector) + { + setup_nodes_collector->processRequest(*request_from_log); + } + else + { + request_from_log->connection = get_zookeeper_connection(request_from_log->session_id); + push_request(std::move(*request_from_log)); + } if (delay > 0 && delay_watch.elapsedSeconds() > delay) { - dumpStats("Write", stats.write_requests); - dumpStats("Read", stats.read_requests); - std::cerr << std::endl; + if (setup_nodes_collector) + setup_nodes_collector->generateSnapshot(); + else + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + std::cerr << std::endl; + } delay_watch.restart(); } } @@ -906,7 +1138,7 @@ void Runner::runBenchmarkWithGenerator() for (size_t i = 0; i < concurrency; ++i) { auto thread_connections = connections; - pool->scheduleOrThrowOnError([this, connections_ = std::move(thread_connections)]() mutable { thread(connections_); }); + pool->scheduleOrThrowOnError([this, my_connections = std::move(thread_connections)]() mutable { thread(my_connections); }); } } catch (...) diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index 0c646eb2166..c19a4d82898 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -27,6 +27,7 @@ public: void startup(Coordination::ZooKeeper & zookeeper); void cleanup(Coordination::ZooKeeper & zookeeper); + private: struct Node { @@ -54,6 +55,7 @@ public: std::optional concurrency_, const std::string & config_path, const std::string & input_request_log_, + const std::string & setup_nodes_snapshot_path_, const Strings & hosts_strings_, std::optional max_time_, std::optional delay_, @@ -96,6 +98,7 @@ private: std::shared_ptr getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx); std::string input_request_log; + std::string setup_nodes_snapshot_path; size_t concurrency = 1; diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 45fc28f3bca..0b963abf406 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -38,6 +38,7 @@ int main(int argc, char *argv[]) ("help", "produce help message") ("config", value()->default_value(""), "yaml/xml file containing configuration") ("input-request-log", value()->default_value(""), "log of requests that will be replayed") + ("setup-nodes-snapshot-path", value()->default_value(""), "directory containing snapshots with starting state") ("concurrency,c", value(), "number of parallel queries") ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") ("iterations,i", value(), "amount of queries to be executed") @@ -60,6 +61,7 @@ int main(int argc, char *argv[]) Runner runner(valueToOptional(options["concurrency"]), options["config"].as(), options["input-request-log"].as(), + options["setup-nodes-snapshot-path"].as(), options["hosts"].as(), valueToOptional(options["time-limit"]), valueToOptional(options["report-delay"]), From 90b8ae0f0b2dcec76f750580f152f1e8e005b938 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 May 2024 19:39:31 +0200 Subject: [PATCH 166/651] Ignore global profiler if system.trace_log is not enabled Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ad96018a17e..71cd811b6f2 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -127,6 +127,11 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) { #if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) + /// profilers are useless without trace collector + auto global_context_ptr = global_context.lock(); + if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) + return; + try { if (global_profiler_real_time_period > 0) From 0b270a67cfe0d52dd3de8e62db0fc20e4725d723 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 May 2024 21:21:07 +0200 Subject: [PATCH 167/651] Fix disabling global profilers for keeper standalone build CLICKHOUSE_KEEPER_STANDALONE_BUILD does not set while compiling ThreadStatus.cpp, but it linked to the clickhouse-keeper standalone build, and before this patch it simply leads to the linking error [1]: May 10 20:02:58 ld.lld-17: error: undefined symbol: DB::Context::hasTraceCollector() const May 10 20:02:58 >>> referenced by ThreadStatus.cpp:132 (./build_docker/./src/Common/ThreadStatus.cpp:132) May 10 20:02:58 >>> lto.tmp:(DB::ThreadStatus::initGlobalProfiler(unsigned long, unsigned long)) May 10 20:02:58 clang++-17: error: linker command failed with exit code 1 (use -v to see invocation) [1]: https://s3.amazonaws.com/clickhouse-test-reports/63632/643061bd9d7ef16641ea9537be868fc39d029726/clickhouse_build_check/report.html Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 2 +- src/Coordination/Standalone/Context.cpp | 5 +++++ src/Coordination/Standalone/Context.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 71cd811b6f2..aaf5618ca5e 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -126,7 +126,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) { -#if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) +#if !defined(SANITIZER) && !defined(__APPLE__) /// profilers are useless without trace collector auto global_context_ptr = global_context.lock(); if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 1095a11566f..36990d263b5 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -457,4 +457,9 @@ const ServerSettings & Context::getServerSettings() const return shared->server_settings; } +bool Context::hasTraceCollector() const +{ + return false; +} + } diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index ff85e032814..452989e9296 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -163,6 +163,8 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; const ServerSettings & getServerSettings() const; + + bool hasTraceCollector() const; }; } From 11f1d9a30effcccc23109a99cde3cae0c91f5612 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 09:09:09 +0200 Subject: [PATCH 168/651] Remove extra includes of ThreadPool.h in tests Signed-off-by: Azat Khuzhin --- src/Common/tests/gtest_rw_lock.cpp | 4 ++-- src/Databases/MySQL/tests/gtest_mysql_binlog.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index 08a14aba8fb..d8c6e9cb99d 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -3,8 +3,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -541,7 +541,7 @@ TEST(Common, RWLockWriteLockTimeoutDuringWriteWithWaitingRead) events.add(wc ? "Locked wb" : "Failed to lock wb"); EXPECT_EQ(wc, nullptr); }); - + std::thread rc_thread([&] () { std::this_thread::sleep_for(std::chrono::duration(200)); diff --git a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp index df8433f7cce..11299c5b8b1 100644 --- a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp +++ b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp @@ -1,4 +1,3 @@ -#include #include #include #include From 4ad88d04b416b6fd8fd4c2d468108df164f9ba36 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 08:58:53 +0200 Subject: [PATCH 169/651] Move initGlobalProfiler() into ThreadStatusExt.cpp and fix examples To avoid undefined references in examples: May 11 01:58:40 ld.lld-17: error: undefined symbol: DB::Context::hasTraceCollector() const May 11 01:58:40 >>> referenced by ThreadStatus.cpp:132 (/build/src/Common/ThreadStatus.cpp:132) May 11 01:58:40 >>> ThreadStatus.cpp.o:(DB::ThreadStatus::initGlobalProfiler(unsigned long, unsigned long)) in archive src/libclickhouse_common_iod.a May 11 01:58:40 clang++-17: error: linker command failed with exit code 1 (use -v to see invocation) Move it firstly into ThreadStatusExt and then do not try to use it from the ThreadPool. Signed-off-by: Azat Khuzhin --- src/Common/ThreadPool.cpp | 2 ++ src/Common/ThreadPool.h | 5 ++++ src/Common/ThreadStatus.cpp | 25 ------------------- src/Common/examples/parallel_aggregation.cpp | 5 +++- src/Common/examples/parallel_aggregation2.cpp | 17 +++++++------ .../examples/thread_creation_latency.cpp | 8 +++--- src/Interpreters/ThreadStatusExt.cpp | 25 +++++++++++++++++++ 7 files changed, 51 insertions(+), 36 deletions(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index b9029d9287d..60c1e12bc2a 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -494,8 +494,10 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ template class ThreadPoolImpl; template class ThreadPoolImpl>; +template class ThreadPoolImpl>; template class ThreadFromGlobalPoolImpl; template class ThreadFromGlobalPoolImpl; +template class ThreadFromGlobalPoolImpl; std::unique_ptr GlobalThreadPool::the_instance; diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 0f1b609f899..4c2403ed6e3 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -242,6 +242,11 @@ public: if (unlikely(global_profiler_real_time_period != 0 || global_profiler_cpu_time_period != 0)) thread_status.initGlobalProfiler(global_profiler_real_time_period, global_profiler_cpu_time_period); } + else + { + UNUSED(global_profiler_real_time_period); + UNUSED(global_profiler_cpu_time_period); + } std::apply(function, arguments); }, diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index aaf5618ca5e..8719a9e093a 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -124,31 +124,6 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) #endif } -void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) -{ -#if !defined(SANITIZER) && !defined(__APPLE__) - /// profilers are useless without trace collector - auto global_context_ptr = global_context.lock(); - if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) - return; - - try - { - if (global_profiler_real_time_period > 0) - query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_real_time_period)); - - if (global_profiler_cpu_time_period > 0) - query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_cpu_time_period)); - } - catch (...) - { - tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); - } -#endif -} - ThreadGroupPtr ThreadStatus::getThreadGroup() const { chassert(current_thread == this); diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp index 7094690a3a8..a7650ff1dc5 100644 --- a/src/Common/examples/parallel_aggregation.cpp +++ b/src/Common/examples/parallel_aggregation.cpp @@ -20,6 +20,9 @@ #include +using ThreadFromGlobalPoolSimple = ThreadFromGlobalPoolImpl; +using SimpleThreadPool = ThreadPoolImpl; + using Key = UInt64; using Value = UInt64; @@ -255,7 +258,7 @@ int main(int argc, char ** argv) std::cerr << std::fixed << std::setprecision(2); - ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); + SimpleThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); Source data(n); diff --git a/src/Common/examples/parallel_aggregation2.cpp b/src/Common/examples/parallel_aggregation2.cpp index e7136707dbd..a1cebdba469 100644 --- a/src/Common/examples/parallel_aggregation2.cpp +++ b/src/Common/examples/parallel_aggregation2.cpp @@ -20,6 +20,9 @@ #include +using ThreadFromGlobalPoolSimple = ThreadFromGlobalPoolImpl; +using SimpleThreadPool = ThreadPoolImpl; + using Key = UInt64; using Value = UInt64; using Source = std::vector; @@ -38,7 +41,7 @@ struct AggregateIndependent template static void NO_INLINE execute(const Source & data, size_t num_threads, std::vector> & results, Creator && creator, Updater && updater, - ThreadPool & pool) + SimpleThreadPool & pool) { results.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) @@ -76,7 +79,7 @@ struct AggregateIndependentWithSequentialKeysOptimization template static void NO_INLINE execute(const Source & data, size_t num_threads, std::vector> & results, Creator && creator, Updater && updater, - ThreadPool & pool) + SimpleThreadPool & pool) { results.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) @@ -124,7 +127,7 @@ struct MergeSequential template static void NO_INLINE execute(Map ** source_maps, size_t num_maps, Map *& result_map, Merger && merger, - ThreadPool &) + SimpleThreadPool &) { for (size_t i = 1; i < num_maps; ++i) { @@ -144,7 +147,7 @@ struct MergeSequentialTransposed /// In practice not better than usual. template static void NO_INLINE execute(Map ** source_maps, size_t num_maps, Map *& result_map, Merger && merger, - ThreadPool &) + SimpleThreadPool &) { std::vector iterators(num_maps); for (size_t i = 1; i < num_maps; ++i) @@ -177,7 +180,7 @@ struct MergeParallelForTwoLevelTable template static void NO_INLINE execute(Map ** source_maps, size_t num_maps, Map *& result_map, Merger && merger, - ThreadPool & pool) + SimpleThreadPool & pool) { for (size_t bucket = 0; bucket < Map::NUM_BUCKETS; ++bucket) pool.scheduleOrThrowOnError([&, bucket, num_maps] @@ -202,7 +205,7 @@ struct Work template static void NO_INLINE execute(const Source & data, size_t num_threads, Creator && creator, Updater && updater, Merger && merger, - ThreadPool & pool) + SimpleThreadPool & pool) { std::vector> intermediate_results; @@ -282,7 +285,7 @@ int main(int argc, char ** argv) std::cerr << std::fixed << std::setprecision(2); - ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); + SimpleThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); Source data(n); diff --git a/src/Common/examples/thread_creation_latency.cpp b/src/Common/examples/thread_creation_latency.cpp index 48a28488068..8732d0a97d1 100644 --- a/src/Common/examples/thread_creation_latency.cpp +++ b/src/Common/examples/thread_creation_latency.cpp @@ -14,6 +14,8 @@ int value = 0; static void f() { ++value; } static void * g(void *) { f(); return {}; } +using ThreadFromGlobalPoolSimple = ThreadFromGlobalPoolImpl; +using SimpleThreadPool = ThreadPoolImpl; namespace CurrentMetrics { @@ -72,7 +74,7 @@ int main(int argc, char ** argv) test(n, "Create and destroy ThreadPool each iteration", [] { - ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); + SimpleThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); tp.scheduleOrThrowOnError(f); tp.wait(); }); @@ -93,7 +95,7 @@ int main(int argc, char ** argv) }); { - ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); + SimpleThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); test(n, "Schedule job for Threadpool each iteration", [&tp] { @@ -103,7 +105,7 @@ int main(int argc, char ** argv) } { - ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 128); + SimpleThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 128); test(n, "Schedule job for Threadpool with 128 threads each iteration", [&tp] { diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 2b8e8bef6d4..6607df8d9af 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -458,6 +458,31 @@ void ThreadStatus::resetPerformanceCountersLastUsage() taskstats->reset(); } +void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) +{ +#if !defined(SANITIZER) && !defined(__APPLE__) + /// profilers are useless without trace collector + auto global_context_ptr = global_context.lock(); + if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) + return; + + try + { + if (global_profiler_real_time_period > 0) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_real_time_period)); + + if (global_profiler_cpu_time_period > 0) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_cpu_time_period)); + } + catch (...) + { + tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); + } +#endif +} + void ThreadStatus::initQueryProfiler() { if (internal_thread) From d74328541767eba99f2699c5539cd79f11dd41e3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 09:42:00 +0200 Subject: [PATCH 170/651] Fix undefined symbol createFunctionBaseCast() during linking examples Signed-off-by: Azat Khuzhin --- src/Functions/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 11bcc948288..23f34828802 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -15,6 +15,8 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources checkHyperscanRegexp.cpp array/has.cpp CastOverloadResolver.cpp + # Provides dependency for cast - createFunctionBaseCast() + FunctionsConversion.cpp ) extract_into_parent_list(clickhouse_functions_headers dbms_headers IFunction.h From 44f77fe9f5881033b8484bbe8a7cac4ad940a5d3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 13:25:36 +0200 Subject: [PATCH 171/651] Link dbms for zookeeper examples Since it uses ZooKeeper, which has ThreadFromGlobalPool inside, which requires THreadPool with enabled profiler, which requires ThreadStatusExt.cpp, which included only into dbms, but not into clickhouse_common_io (like ThreadStatus.cpp) Error: FAILED: src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib ld.lld-17: error: undefined symbol: DB::ThreadStatus::initGlobalProfiler(unsigned long, unsigned long) >>> referenced by ThreadPool.h:243 (./src/Common/ThreadPool.h:243) >>> ZooKeeperImpl.cpp.o:(void std::__1::__function::__policy_invoker::__call_impl::ThreadFromGlobalPoolImpl> const&, zkutil::ZooKeeperArgs const&, std::__1::shared_ptr)::$_0>(Coordination::ZooKeeper::ZooKeeper(std::__1::vector> const&, zkutil::ZooKeeperArgs const&, std::__1::shared_ptr)::$_0&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*)) in archive src/Common/ZooKeeper/libclickhouse_common_zookeeper_no_log.a Another way of fixing it is to provide some define wich default value for "is profiler enabled" for ThreadPool, should work, but will be tricky. Signed-off-by: Azat Khuzhin --- src/Common/ZooKeeper/examples/CMakeLists.txt | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index a99fbe55dd8..c5a93f2701e 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -1,8 +1,16 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp) -target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(zkutil_test_commands PRIVATE + clickhouse_common_zookeeper_no_log + dbms) clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) -target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression string_utils) +target_link_libraries(zkutil_test_commands_new_lib PRIVATE + clickhouse_common_zookeeper_no_log + clickhouse_compression + string_utils + dbms) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) -target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(zkutil_test_async PRIVATE + clickhouse_common_zookeeper_no_log + dbms) From 5d7d9e9e3485d41d70998c1bb9c0864910a64de0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 14:35:41 +0200 Subject: [PATCH 172/651] Provide ThreadStatus::initGlobalProfiler() for standalone keeper build Signed-off-by: Azat Khuzhin --- src/Coordination/Standalone/ThreadStatusExt.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp index 97f7287be8c..fc78233d9dc 100644 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ b/src/Coordination/Standalone/ThreadStatusExt.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -11,4 +12,8 @@ void CurrentThread::attachToGroup(const ThreadGroupPtr &) { } +void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) +{ +} + } From 7261f924bb671ceb9d2131175d558df7296ff217 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 15:54:02 +0200 Subject: [PATCH 173/651] Exclude FunctionsConversion from the large objects check for now Signed-off-by: Azat Khuzhin --- utils/check-style/check-large-objects.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 2122cca911e..e2266e89556 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -7,6 +7,8 @@ export LC_ALL=C # The "total" should be printed without localization TU_EXCLUDES=( AggregateFunctionUniq Aggregator + # FIXME: Exclude for now + FunctionsConversion ) if find $1 -name '*.o' | xargs wc -c | grep --regexp='\.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ From a4e1ddc95ab34d51c3e2a3cc023abbbe1c6b24e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 19:21:32 +0200 Subject: [PATCH 174/651] Link dbms to ZooKeeper examples Signed-off-by: Azat Khuzhin --- utils/zookeeper-cli/CMakeLists.txt | 4 +++- utils/zookeeper-dump-tree/CMakeLists.txt | 6 +++++- utils/zookeeper-remove-by-list/CMakeLists.txt | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index be8cf81320c..cad7164b775 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,4 +1,6 @@ clickhouse_add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) -target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(clickhouse-zookeeper-cli PRIVATE + clickhouse_common_zookeeper_no_log + dbms) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index 182cb65f194..85e4d18c19f 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -1,2 +1,6 @@ clickhouse_add_executable (zookeeper-dump-tree main.cpp ${SRCS}) -target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_io boost::program_options) +target_link_libraries(zookeeper-dump-tree PRIVATE + clickhouse_common_zookeeper_no_log + clickhouse_common_io + dbms + boost::program_options) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index 01965413d29..50aaed76110 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -1,2 +1,5 @@ clickhouse_add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) -target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper_no_log boost::program_options) +target_link_libraries(zookeeper-remove-by-list PRIVATE + clickhouse_common_zookeeper_no_log + dbms + boost::program_options) From 599fce53e38933f3f6f3d9679e9df5fe7026e98a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 13 May 2024 11:03:58 +0200 Subject: [PATCH 175/651] Fix compiling FunctionsConversion.cpp (by properly passing -g0) CI reports [1]: May 11 20:27:25 FAILED: src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o May 11 20:27:25 prlimit --as=10000000000 --data=5000000000 --cpu=1800 /usr/bin/sccache /usr/bin/clang++-17 --target=riscv64-linux-gnu --sysroot=/build/cmake/linux/../../contrib/sysroot/linux-riscv64 -DANNOYLIB_MULTITHREADED_BUILD -DBOOST_ASIO_HAS_STD_INVOKE_RESULT=1 -DBOOST_ASIO_STANDALONE=1 -DBOOST_TIMER_ENABLE_DEPRECATED=1 -DCONFIGDIR=\"\" -DDUMMY_BACKTRACE -DENABLE_ANNOY -DENABLE_MULTITARGET_CODE=1 -DENABLE_USEARCH -DHAVE_BZLIB_H=1 -DHAVE_CONFIG_H -DHAVE_FUTIMESAT=1 -DHAVE_ICONV=1 -DHAVE_LIBLZMA=1 -DHAVE_LIBZSTD=1 -DHAVE_LIBZSTD_COMPRESSOR=1 -DHAVE_LINUX_FS_H=1 -DHAVE_LINUX_TYPES_H=1 -DHAVE_LZMA_H=1 -DHAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1 -DHAVE_SYS_STATFS_H=1 -DHAVE_ZLIB_H=1 -DHAVE_ZSTD_H=1 -DINCBIN_SILENCE_BITCODE_WARNING -DLIBSASL_EXPORTS=1 -DLZ4_DISABLE_DEPRECATE_WARNINGS=1 -DLZ4_FAST_DEC_LOOP=1 -DMAJOR_IN_SYSMACROS=1 -DOBSOLETE_CRAM_ATTR=1 -DOBSOLETE_DIGEST_ATTR=1 -DPLUGINDIR=\"\" -DPOCO_ENABLE_CPP11 -DPOCO_HAVE_FD_EPOLL -DPOCO_OS_FAMILY_UNIX -DSASLAUTHD_CONF_FILE_DEFAULT=\"\" -DSNAPPY_CODEC_AVAILABLE -DSTD_EXCEPTION_HAS_STACK_TRACE=1 -DUSE_CLICKHOUSE_THREADS=1 -DWITH_COVERAGE=0 -DWITH_GZFILEOP -DZLIB_COMPAT -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS -I/build/build_docker/includes/configs -I/build/src -I/build/build_docker/src -I/build/build_docker/src/Core/include -I/build/base/base/.. -I/build/build_docker/base/base/.. -I/build/contrib/cctz/include -I/build/contrib/re2 -I/build/base/pcg-random/. -I/build/contrib/libfiu/libfiu -I/build/contrib/libssh/include -I/build/build_docker/contrib/libssh/include -I/build/contrib/miniselect/include -I/build/contrib/zstd/lib -I/build/contrib/pocketfft -I/build/contrib/libarchive-cmake -I/build/contrib/libarchive/libarchive -I/build/build_docker/contrib/cyrus-sasl-cmake -I/build/contrib/lz4/lib -isystem /build/contrib/llvm-project/libcxx/include -isystem /build/contrib/llvm-project/libcxxabi/include -isystem /build/contrib/libunwind/include -isystem /build/contrib/libdivide-cmake/. -isystem /build/contrib/libdivide -isystem /build/contrib/jemalloc-cmake/include -isystem /build/contrib/llvm-project/llvm/include -isystem /build/build_docker/contrib/llvm-project/llvm/include -isystem /build/contrib/abseil-cpp -isystem /build/contrib/croaring/cpp -isystem /build/contrib/croaring/include -isystem /build/contrib/sparsehash-c11 -isystem /build/contrib/incbin -isystem /build/contrib/cityhash102/include -isystem /build/contrib/boost -isystem /build/base/poco/Net/include -isystem /build/base/poco/Foundation/include -isystem /build/base/poco/NetSSL_OpenSSL/include -isystem /build/base/poco/Crypto/include -isystem /build/contrib/openssl-cmake/linux_riscv64/include -isystem /build/contrib/openssl/include -isystem /build/base/poco/Util/include -isystem /build/base/poco/JSON/include -isystem /build/base/poco/XML/include -isystem /build/contrib/replxx/include -isystem /build/contrib/fmtlib-cmake/../fmtlib/include -isystem /build/contrib/magic_enum/include -isystem /build/contrib/double-conversion -isystem /build/contrib/dragonbox/include -isystem /build/contrib/zlib-ng -isystem /build/build_docker/contrib/zlib-ng-cmake -isystem /build/contrib/pdqsort -isystem /build/contrib/xz/src/liblzma/api -isystem /build/contrib/aws/src/aws-cpp-sdk-core/include -isystem /build/build_docker/contrib/aws-cmake/include -isystem /build/contrib/aws/generated/src/aws-cpp-sdk-s3/include -isystem /build/contrib/aws-c-auth/include -isystem /build/contrib/aws-c-common/include -isystem /build/contrib/aws-c-io/include -isystem /build/contrib/aws-crt-cpp/include -isystem /build/contrib/aws-c-mqtt/include -isystem /build/contrib/aws-c-sdkutils/include -isystem /build/contrib/azure/sdk/core/azure-core/inc -isystem /build/contrib/azure/sdk/identity/azure-identity/inc -isystem /build/contrib/azure/sdk/storage/azure-storage-common/inc -isystem /build/contrib/azure/sdk/storage/azure-storage-blobs/inc -isystem /build/contrib/snappy -isystem /build/build_docker/contrib/snappy-cmake -isystem /build/contrib/libbcrypt -isystem /build/contrib/msgpack-c/include -isystem /build/build_docker/contrib/liburing/src/include-compat -isystem /build/build_docker/contrib/liburing/src/include -isystem /build/contrib/liburing/src/include -isystem /build/contrib/fast_float/include -isystem /build/contrib/librdkafka-cmake/include -isystem /build/contrib/librdkafka/src -isystem /build/build_docker/contrib/librdkafka-cmake/auxdir -isystem /build/contrib/cppkafka/include -isystem /build/contrib/nats-io/src -isystem /build/contrib/nats-io/src/adapters -isystem /build/contrib/nats-io/src/include -isystem /build/contrib/nats-io/src/unix -isystem /build/contrib/libuv/include -isystem /build/contrib/krb5/src/include -isystem /build/build_docker/contrib/krb5-cmake/include -isystem /build/contrib/NuRaft/include -isystem /build/base/poco/MongoDB/include -isystem /build/base/poco/Redis/include -isystem /build/contrib/icu/icu4c/source/i18n -isystem /build/contrib/icu/icu4c/source/common -isystem /build/contrib/capnproto/c++/src -isystem /build/contrib/avro/lang/c++/api -isystem /build/contrib/google-protobuf/src -isystem /build/contrib/s2geometry/src -isystem /build/contrib/s2geometry-cmake -isystem /build/contrib/AMQP-CPP/include -isystem /build/contrib/AMQP-CPP -isystem /build/contrib/sqlite-amalgamation -isystem /build/contrib/rocksdb/include -isystem /build/contrib/libpqxx/include -isystem /build/contrib/libpq -isystem /build/contrib/libpq/include -isystem /build/contrib/libstemmer_c/include -isystem /build/contrib/wordnet-blast -isystem /build/contrib/lemmagen-c/include -isystem /build/contrib/ulid-c/include -isystem /build/contrib/simdjson/include -isystem /build/contrib/rapidjson/include -isystem /build/contrib/consistent-hashing -isystem /build/contrib/annoy/src -isystem /build/contrib/FP16/include -isystem /build/contrib/robin-map/include -isystem /build/contrib/SimSIMD-map/include -isystem /build/contrib/usearch/include --gcc-toolchain=/build/cmake/linux/../../contrib/sysroot/linux-riscv64 -fdiagnostics-color=always -Xclang -fuse-ctor-homing -Wno-enum-constexpr-conversion -fsized-deallocation -gdwarf-aranges -pipe -fasynchronous-unwind-tables -ffile-prefix-map=/build=. -ftime-trace -falign-functions=32 -ffp-contract=off -fdiagnostics-absolute-paths -fstrict-vtable-pointers -Wall -Wextra -Wframe-larger-than=65536 -Weverything -Wpedantic -Wno-zero-length-array -Wno-c++98-compat-pedantic -Wno-c++98-compat -Wno-c++20-compat -Wno-sign-conversion -Wno-implicit-int-conversion -Wno-implicit-int-float-conversion -Wno-ctad-maybe-unsupported -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-double-promotion -Wno-exit-time-destructors -Wno-float-equal -Wno-global-constructors -Wno-missing-prototypes -Wno-missing-variable-declarations -Wno-padded -Wno-switch-enum -Wno-undefined-func-template -Wno-unused-template -Wno-vla -Wno-weak-template-vtables -Wno-weak-vtables -Wno-thread-safety-negative -Wno-enum-constexpr-conversion -Wno-unsafe-buffer-usage -Wno-switch-default -O2 -g -DNDEBUG -O3 -g -fno-pie -std=c++23 -D OS_LINUX -Werror -Wno-deprecated-declarations -Wno-poison-system-directories -nostdinc++ -MD -MT src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o -MF src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o.d -o src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o -c /build/src/Functions/FunctionsConversion.cpp May 11 20:27:25 sccache: warning: The server looks like it shut down unexpectedly, compiling locally instead [1]: https://s3.amazonaws.com/clickhouse-test-reports/63632/e795e0e028d45b654e099dee136a44e7ac5ed627/clickhouse_special_build_check/report.html Signed-off-by: Azat Khuzhin --- src/Functions/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 23f34828802..751e8cf5103 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -3,7 +3,7 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -extract_into_parent_list(clickhouse_functions_sources dbms_sources +set(DBMS_FUNCTIONS IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp @@ -18,6 +18,7 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources # Provides dependency for cast - createFunctionBaseCast() FunctionsConversion.cpp ) +extract_into_parent_list(clickhouse_functions_sources dbms_sources ${DBMS_FUNCTIONS}) extract_into_parent_list(clickhouse_functions_headers dbms_headers IFunction.h FunctionFactory.h @@ -28,6 +29,10 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers ) add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) +if (OMIT_HEAVY_DEBUG_SYMBOLS) + target_compile_options(clickhouse_functions_obj PRIVATE "-g0") + set_source_files_properties(${DBMS_FUNCTIONS} PROPERTIES COMPILE_FLAGS "-g0") +endif() list (APPEND OBJECT_LIBS $) @@ -64,10 +69,6 @@ if (TARGET OpenSSL::Crypto) list (APPEND PUBLIC_LIBS OpenSSL::Crypto) endif() -if (OMIT_HEAVY_DEBUG_SYMBOLS) - target_compile_options(clickhouse_functions_obj PRIVATE "-g0") -endif() - if (TARGET ch_contrib::icu) list (APPEND PRIVATE_LIBS ch_contrib::icu) endif () From d468a0a57794d457dc29e5c92d1733b2df0237dc Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 13 May 2024 12:00:15 +0000 Subject: [PATCH 176/651] Add archives reading support to s3 --- docker/test/stateless/setup_minio.sh | 2 +- docs/en/sql-reference/table-functions/s3.md | 19 + src/IO/S3/URI.cpp | 50 +- src/IO/S3/URI.h | 7 + .../DataLakes/DeltaLakeMetadataParser.cpp | 1 + src/Storages/DataLakes/IStorageDataLake.h | 4 +- .../DataLakes/Iceberg/IcebergMetadata.cpp | 9 +- .../DataLakes/Iceberg/StorageIceberg.h | 2 +- src/Storages/DataLakes/S3MetadataReader.cpp | 1 + src/Storages/S3Queue/StorageS3Queue.cpp | 20 +- src/Storages/StorageS3.cpp | 977 +++++++++++------- src/Storages/StorageS3.h | 158 ++- src/Storages/StorageS3Cluster.cpp | 9 +- src/TableFunctions/TableFunctionS3.cpp | 14 +- .../03036_reading_s3_archives.reference | 52 + .../0_stateless/03036_reading_s3_archives.sql | 22 + .../0_stateless/data_minio/03036_archive1.tar | Bin 0 -> 10240 bytes .../0_stateless/data_minio/03036_archive1.zip | Bin 0 -> 372 bytes .../0_stateless/data_minio/03036_archive2.tar | Bin 0 -> 10240 bytes .../0_stateless/data_minio/03036_archive2.zip | Bin 0 -> 372 bytes .../data_minio/03036_archive3.tar.gz | Bin 0 -> 185 bytes .../03036_compressed_file_archive.zip | Bin 0 -> 231 bytes 22 files changed, 920 insertions(+), 427 deletions(-) create mode 100644 tests/queries/0_stateless/03036_reading_s3_archives.reference create mode 100644 tests/queries/0_stateless/03036_reading_s3_archives.sql create mode 100644 tests/queries/0_stateless/data_minio/03036_archive1.tar create mode 100644 tests/queries/0_stateless/data_minio/03036_archive1.zip create mode 100644 tests/queries/0_stateless/data_minio/03036_archive2.tar create mode 100644 tests/queries/0_stateless/data_minio/03036_archive2.zip create mode 100644 tests/queries/0_stateless/data_minio/03036_archive3.tar.gz create mode 100644 tests/queries/0_stateless/data_minio/03036_compressed_file_archive.zip diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index c756ce4669d..2b9433edd20 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -83,7 +83,7 @@ setup_minio() { ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse ./mc admin user add clickminio test testtest ./mc admin policy set clickminio readwrite user=test - ./mc mb clickminio/test + ./mc mb --ignore-existing clickminio/test if [ "$test_type" = "stateless" ]; then ./mc policy set public clickminio/test fi diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 970b3e52882..38d77a98749 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -248,6 +248,25 @@ FROM s3( LIMIT 5; ``` + +## Working with archives + +Suppose that we have several archive files with following URIs on S3: + +- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-10.csv.zip' +- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-11.csv.zip' +- 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-12.csv.zip' + +Extracting data from these archives is possible using ::. Globs can be used both in the url part as well as in the part after :: (responsible for the name of a file inside the archive). + +``` sql +SELECT * +FROM s3( + 'https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m-2018-01-1{0..2}.csv.zip :: *.csv' +); +``` + + ## Virtual Columns {#virtual-columns} - `_path` — Path to the file. Type: `LowCardinalty(String)`. diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 4e679e6c477..4bf7a3ddf86 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -1,8 +1,7 @@ #include -#include -#include "Common/Macros.h" #include #include +#include "Common/Macros.h" #if USE_AWS_S3 #include #include @@ -55,7 +54,11 @@ URI::URI(const std::string & uri_) static constexpr auto OSS = "OSS"; static constexpr auto EOS = "EOS"; - uri = Poco::URI(uri_); + if (containsArchive(uri_)) + std::tie(uri_str, archive_pattern) = getPathToArchiveAndArchivePattern(uri_); + else + uri_str = uri_; + uri = Poco::URI(uri_str); std::unordered_map mapper; auto context = Context::getGlobalContextInstance(); @@ -126,9 +129,10 @@ URI::URI(const std::string & uri_) boost::to_upper(name); /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", - quoteString(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", + quoteString(name)); if (name == COS) storage_name = COSN; @@ -156,10 +160,40 @@ void URI::validateBucket(const String & bucket, const Poco::URI & uri) /// S3 specification requires at least 3 and at most 63 characters in bucket name. /// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html if (bucket.length() < 3 || bucket.length() > 63) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}", - quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}", + quoteString(bucket), + !uri.empty() ? " (" + uri.toString() + ")" : ""); } +bool URI::containsArchive(const std::string & source) +{ + size_t pos = source.find("::"); + return (pos != std::string::npos); +} + +std::pair URI::getPathToArchiveAndArchivePattern(const std::string & source) +{ + size_t pos = source.find("::"); + assert(pos != std::string::npos); + + std::string path_to_archive = source.substr(0, pos); + while ((!path_to_archive.empty()) && path_to_archive.ends_with(' ')) + path_to_archive.pop_back(); + + if (path_to_archive.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + + std::string_view path_in_archive_view = std::string_view{source}.substr(pos + 2); + while (path_in_archive_view.front() == ' ') + path_in_archive_view.remove_prefix(1); + + if (path_in_archive_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + + return {path_to_archive, std::string{path_in_archive_view}}; +} } } diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 06b7d03aa8c..c52e6bc1441 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "config.h" @@ -28,6 +29,8 @@ struct URI std::string key; std::string version_id; std::string storage_name; + std::optional archive_pattern; + std::string uri_str; bool is_virtual_hosted_style; @@ -36,6 +39,10 @@ struct URI void addRegionToURI(const std::string & region); static void validateBucket(const std::string & bucket, const Poco::URI & uri); + +private: + bool containsArchive(const std::string & source); + std::pair getPathToArchiveAndArchivePattern(const std::string & source); }; } diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 14a912a180d..1687a4754f5 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace fs = std::filesystem; diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index 711abbde38c..2147f2c9e6b 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -54,7 +54,7 @@ public: { std::lock_guard lock(configuration_update_mutex); updateConfigurationImpl(local_context); - return Storage::getConfiguration(); + return Storage::getConfigurationCopy(); } void updateConfiguration(const ContextPtr & local_context) override @@ -106,7 +106,7 @@ private: const bool updated = base_configuration.update(local_context); auto new_keys = getDataFiles(base_configuration, local_context); - if (!updated && new_keys == Storage::getConfiguration().keys) + if (!updated && new_keys == Storage::getConfigurationCopy().keys) return; Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys)); diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp index df1536f53fc..883b2a01dc5 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp @@ -31,16 +31,17 @@ #include #include +#include namespace DB { namespace ErrorCodes { - extern const int FILE_DOESNT_EXIST; - extern const int ILLEGAL_COLUMN; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; +extern const int FILE_DOESNT_EXIST; +extern const int ILLEGAL_COLUMN; +extern const int BAD_ARGUMENTS; +extern const int UNSUPPORTED_METHOD; } IcebergMetadata::IcebergMetadata( diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index 45cbef0b41b..9e3885124d6 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -63,7 +63,7 @@ public: { std::lock_guard lock(configuration_update_mutex); updateConfigurationImpl(local_context); - return StorageS3::getConfiguration(); + return StorageS3::getConfigurationCopy(); } void updateConfiguration(const ContextPtr & local_context) override diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index d66e21550a3..62a486951fe 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -10,6 +10,7 @@ #include #include +#include namespace DB { diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index b6daadf8bc4..d3449884b3c 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -1,3 +1,4 @@ +#include #include "config.h" #if USE_AWS_S3 @@ -372,7 +373,11 @@ std::shared_ptr StorageS3Queue::createSource( auto configuration_snapshot = updateConfigurationAndGetCopy(local_context); auto internal_source = std::make_unique( - info, configuration.format, getName(), local_context, format_settings, + info, + configuration.format, + getName(), + local_context, + format_settings, max_block_size, configuration_snapshot.request_settings, configuration_snapshot.compression_method, @@ -380,7 +385,9 @@ std::shared_ptr StorageS3Queue::createSource( configuration_snapshot.url.bucket, configuration_snapshot.url.version_id, configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()), - file_iterator, local_context->getSettingsRef().max_download_threads, false); + file_iterator, + local_context->getSettingsRef().max_download_threads, + false); auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable { @@ -611,8 +618,13 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto glob_iterator = std::make_unique( - *configuration.client, configuration.url, predicate, getVirtualsList(), local_context, - /* read_keys */nullptr, configuration.request_settings); + *configuration.client, + configuration.url, + predicate, + getVirtualsList(), + local_context, + /* read_keys */ nullptr, + configuration.request_settings); return std::make_shared(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 56417369869..ea4afb52572 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1,4 +1,14 @@ -#include "config.h" +#include +#include +#include +#include +#include +#include +#include "Common/logger_useful.h" +#include "IO/CompressionMethod.h" +#include "IO/ReadBuffer.h" +#include "Interpreters/Context_fwd.h" +#include "Storages/MergeTree/ReplicatedMergeTreePartHeader.h" #if USE_AWS_S3 @@ -158,10 +168,11 @@ public: , storage(storage_) , read_from_format_info(std::move(read_from_format_info_)) , need_only_count(need_only_count_) + , query_configuration(storage.getConfigurationCopy()) , max_block_size(max_block_size_) , num_streams(num_streams_) { - query_configuration = storage.updateConfigurationAndGetCopy(context); + query_configuration.update(context); virtual_columns = storage.getVirtualsList(); } @@ -204,7 +215,8 @@ public: , virtual_columns(virtual_columns_) , read_keys(read_keys_) , request_settings(request_settings_) - , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) + , list_objects_pool( + CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) , list_objects_scheduler(threadPoolCallbackRunnerUnsafe(list_objects_pool, "ListObjects")) , file_progress_callback(file_progress_callback_) { @@ -474,7 +486,8 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( KeysWithInfo * read_keys_, const S3Settings::RequestSettings & request_settings_, std::function file_progress_callback_) - : pimpl(std::make_shared(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) + : pimpl(std::make_shared( + client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_)) { } @@ -562,8 +575,7 @@ StorageS3Source::KeysIterator::KeysIterator( KeysWithInfo * read_keys, std::function file_progress_callback_) : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, request_settings_, - read_keys, file_progress_callback_)) + client_, version_id_, keys_, bucket_, request_settings_, read_keys, file_progress_callback_)) { } @@ -593,7 +605,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator( pool.wait(); buffer.reserve(max_threads_count); for (auto & key_future : keys) - buffer.emplace_back(std::make_shared(key_future.get(), std::nullopt)); + buffer.emplace_back(std::make_shared(key_future.get())); } StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT @@ -618,6 +630,124 @@ size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount() return buffer.size(); } + +StorageS3Source::ArchiveIterator::ArchiveIterator( + std::unique_ptr basic_iterator_, + const std::string & archive_pattern_, + std::shared_ptr client_, + const String & bucket_, + const String & version_id_, + const S3Settings::RequestSettings & request_settings_, + ContextPtr context_, + KeysWithInfo * read_keys_) + : WithContext(context_) + , basic_iterator(std::move(basic_iterator_)) + , basic_key_with_info_ptr(nullptr) + , client(client_) + , bucket(bucket_) + , version_id(version_id_) + , request_settings(request_settings_) + , read_keys(read_keys_) +{ + if (archive_pattern_.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern_)); + if (!matcher->ok()) + throw Exception( + ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", archive_pattern_, matcher->error()); + filter = IArchiveReader::NameFilter{[matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }}; + } + else + { + path_in_archive = archive_pattern_; + } +} + +StorageS3Source::KeyWithInfoPtr StorageS3Source::ArchiveIterator::next(size_t) +{ + if (!path_in_archive.empty()) + { + std::unique_lock lock{take_next_mutex}; + while (true) + { + basic_key_with_info_ptr = basic_iterator->next(); + if (!basic_key_with_info_ptr) + return {}; + refreshArchiveReader(); + bool file_exists = archive_reader->fileExists(path_in_archive); + if (file_exists) + { + KeyWithInfoPtr archive_key_with_info + = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, path_in_archive, archive_reader); + if (read_keys != nullptr) + read_keys->push_back(archive_key_with_info); + return archive_key_with_info; + } + } + } + else + { + std::unique_lock lock{take_next_mutex}; + while (true) + { + if (!file_enumerator) + { + basic_key_with_info_ptr = basic_iterator->next(); + if (!basic_key_with_info_ptr) + return {}; + refreshArchiveReader(); + file_enumerator = archive_reader->firstFile(); + if (!file_enumerator) + { + file_enumerator.reset(); + continue; + } + } + else if (!file_enumerator->nextFile()) + { + file_enumerator.reset(); + continue; + } + + String current_filename = file_enumerator->getFileName(); + bool satisfies = filter(current_filename); + if (satisfies) + { + KeyWithInfoPtr archive_key_with_info + = std::make_shared(basic_key_with_info_ptr->key, std::nullopt, current_filename, archive_reader); + if (read_keys != nullptr) + read_keys->push_back(archive_key_with_info); + return archive_key_with_info; + } + } + } +} + +size_t StorageS3Source::ArchiveIterator::estimatedKeysCount() +{ + return basic_iterator->estimatedKeysCount(); +} + +void StorageS3Source::ArchiveIterator::refreshArchiveReader() +{ + if (basic_key_with_info_ptr) + { + if (!basic_key_with_info_ptr->info) + { + basic_key_with_info_ptr->info = S3::getObjectInfo(*client, bucket, basic_key_with_info_ptr->key, version_id, request_settings); + } + archive_reader = createArchiveReader( + basic_key_with_info_ptr->key, + [key = basic_key_with_info_ptr->key, archive_size = basic_key_with_info_ptr->info.value().size, context = getContext(), this]() + { return createS3ReadBuffer(key, archive_size, context, client, bucket, version_id, request_settings); }, + basic_key_with_info_ptr->info.value().size); + } + else + { + archive_reader = nullptr; + } +} + StorageS3Source::StorageS3Source( const ReadFromFormatInfo & info, const String & format_, @@ -653,7 +783,8 @@ StorageS3Source::StorageS3Source( , file_iterator(file_iterator_) , max_parsing_threads(max_parsing_threads_) , need_only_count(need_only_count_) - , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) + , create_reader_pool( + CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(create_reader_pool, "CreateS3Reader")) { } @@ -699,9 +830,18 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx) } else { - auto compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); - read_buf = createS3ReadBuffer(key_with_info->key, key_with_info->info->size); - + auto compression_method = CompressionMethod::None; + if (!key_with_info->path_in_archive.has_value()) + { + compression_method = chooseCompressionMethod(key_with_info->key, compression_hint); + read_buf = createS3ReadBuffer( + key_with_info->key, key_with_info->info->size, getContext(), client, bucket, version_id, request_settings); + } + else + { + compression_method = chooseCompressionMethod(key_with_info->path_in_archive.value(), compression_hint); + read_buf = key_with_info->archive_reader->readFile(key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); + } auto input_format = FormatFactory::instance().getInput( format, *read_buf, @@ -753,12 +893,20 @@ std::future StorageS3Source::createReaderAsync(si return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{}); } -std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) +std::unique_ptr createS3ReadBuffer( + const String & key, + size_t object_size, + std::shared_ptr context, + std::shared_ptr client_ptr, + const String & bucket, + const String & version_id, + const S3Settings::RequestSettings & request_settings) { - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + auto read_settings = context->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; - auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; + auto download_buffer_size = context->getSettings().max_download_buffer_size; const bool object_too_small = object_size <= 2 * download_buffer_size; + static LoggerPtr log = getLogger("StorageS3Source"); // Create a read buffer that will prefetch the first ~1 MB of the file. // When reading lots of tiny files, this prefetching almost doubles the throughput. @@ -766,33 +914,46 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return createAsyncS3ReadBuffer(key, read_settings, object_size); + return createAsyncS3ReadBuffer(key, read_settings, object_size, context, client_ptr, bucket, version_id, request_settings); } + return std::make_unique( - client, bucket, key, version_id, request_settings, read_settings, - /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0, - /*restricted_seek_*/ false, object_size); + client_ptr, + bucket, + key, + version_id, + request_settings, + read_settings, + /*use_external_buffer*/ false, + /*offset_*/ 0, + /*read_until_position_*/ 0, + /*restricted_seek_*/ false, + object_size); } -std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( - const String & key, const ReadSettings & read_settings, size_t object_size) +std::unique_ptr createAsyncS3ReadBuffer( + const String & key, + const ReadSettings & read_settings, + size_t object_size, + std::shared_ptr context, + std::shared_ptr client_ptr, + const String & bucket, + const String & version_id, + const S3Settings::RequestSettings & request_settings) { - auto context = getContext(); - auto read_buffer_creator = - [this, read_settings, object_size] - (bool restricted_seek, const StoredObject & object) -> std::unique_ptr + auto read_buffer_creator = [=](bool restricted_seek, const StoredObject & object) -> std::unique_ptr { return std::make_unique( - client, + client_ptr, bucket, object.remote_path, version_id, request_settings, read_settings, - /* use_external_buffer */true, - /* offset */0, - /* read_until_position */0, + /* use_external_buffer */ true, + /* offset */ 0, + /* read_until_position */ 0, restricted_seek, object_size); }; @@ -809,12 +970,12 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, "", read_settings, - /* cache_log */nullptr, /* use_external_buffer */true); + /* cache_log */ nullptr, + /* use_external_buffer */ true); auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); auto async_reader = std::make_unique( - std::move(s3_impl), pool_reader, modified_settings, - context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); + std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); async_reader->setReadUntilEnd(); if (read_settings.remote_fs_prefetch) @@ -855,12 +1016,17 @@ Chunk StorageS3Source::generate() if (const auto * input_format = reader.getInputFormat()) chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize()); + VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + chunk, + requested_virtual_columns, + reader.getPath(), + reader.getFileSize(), + reader.isArchive() ? (&reader.getFile()) : nullptr); return chunk; } if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getFile(), total_rows_in_file); + addNumRowsToCache(reader.getFileExtended(), total_rows_in_file); total_rows_in_file = 0; @@ -890,10 +1056,7 @@ std::optional StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo { String source = fs::path(url_host_and_port) / bucket / key_with_info.key; auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); - auto get_last_mod_time = [&]() -> std::optional - { - return key_with_info.info->last_modification_time; - }; + auto get_last_mod_time = [&]() -> std::optional { return key_with_info.info->last_modification_time; }; return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time); } @@ -910,9 +1073,7 @@ public: const StorageS3::Configuration & configuration_, const String & bucket, const String & key) - : SinkToStorage(sample_block_) - , sample_block(sample_block_) - , format_settings(format_settings_) + : SinkToStorage(sample_block_), sample_block(sample_block_), format_settings(format_settings_) { BlobStorageLogWriterPtr blob_log = nullptr; if (auto blob_storage_log = context->getBlobStorageLog()) @@ -1013,32 +1174,36 @@ private: namespace { - std::optional checkAndGetNewFileOnInsertIfNeeded(const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) + +std::optional checkAndGetNewFileOnInsertIfNeeded( + const ContextPtr & context, const StorageS3::Configuration & configuration, const String & key, size_t sequence_number) +{ + if (context->getSettingsRef().s3_truncate_on_insert + || !S3::objectExists( + *configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) + return std::nullopt; + + if (context->getSettingsRef().s3_create_new_file_on_insert) { - if (context->getSettingsRef().s3_truncate_on_insert || !S3::objectExists(*configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings)) - return std::nullopt; - - if (context->getSettingsRef().s3_create_new_file_on_insert) + auto pos = key.find_first_of('.'); + String new_key; + do { - auto pos = key.find_first_of('.'); - String new_key; - do - { - new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); - ++sequence_number; - } - while (S3::objectExists(*configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); + new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos)); + ++sequence_number; + } while (S3::objectExists( + *configuration.client, configuration.url.bucket, new_key, configuration.url.version_id, configuration.request_settings)); - return new_key; - } - - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.url.bucket, key); + return new_key; } + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", + configuration.url.bucket, key); +} } @@ -1055,7 +1220,8 @@ public: const StorageS3::Configuration & configuration_, const String & bucket_, const String & key_) - : PartitionedSink(partition_by, context_, sample_block_), WithContext(context_) + : PartitionedSink(partition_by, context_, sample_block_) + , WithContext(context_) , format(format_) , sample_block(sample_block_) , compression_method(compression_method_) @@ -1074,19 +1240,11 @@ public: auto partition_key = replaceWildcards(key, partition_id); validateKey(partition_key); - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */1)) + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(getContext(), configuration, partition_key, /* sequence_number */ 1)) partition_key = *new_key; return std::make_shared( - format, - sample_block, - getContext(), - format_settings, - compression_method, - configuration, - partition_bucket, - partition_key - ); + format, sample_block, getContext(), format_settings, compression_method, configuration, partition_bucket, partition_key); } private: @@ -1167,7 +1325,8 @@ StorageS3::StorageS3( /// We don't allow special columns in S3 storage. if (!columns_.hasOnlyOrdinary()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL"); storage_metadata.setColumns(columns_); } @@ -1178,57 +1337,91 @@ StorageS3::StorageS3( } static std::shared_ptr createFileIterator( - const StorageS3::Configuration & configuration, + StorageS3::Configuration configuration, bool distributed_processing, ContextPtr local_context, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, - StorageS3::KeysWithInfo * read_keys = nullptr, + StorageS3Source::KeysWithInfo * read_keys = nullptr, std::function file_progress_callback = {}) { if (distributed_processing) { - return std::make_shared(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); - } - else if (configuration.withGlobs()) - { - /// Iterate through disclosed globs and make a source for each file - return std::make_shared( - *configuration.client, configuration.url, predicate, virtual_columns, - local_context, read_keys, configuration.request_settings, file_progress_callback); + return std::make_shared( + local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads); } else { - Strings keys = configuration.keys; - auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); - if (filter_dag) + auto basic_iterator = [&]() -> std::unique_ptr { - std::vector paths; - paths.reserve(keys.size()); - for (const auto & key : keys) - paths.push_back(fs::path(configuration.url.bucket) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + StorageS3Source::KeysWithInfo * local_read_keys = configuration.url.archive_pattern.has_value() ? nullptr : read_keys; + if (configuration.withGlobs()) + { + /// Iterate through disclosed globs and make a source for each file + return std::make_unique( + *configuration.client, + configuration.url, + predicate, + virtual_columns, + local_context, + local_read_keys, + configuration.request_settings, + file_progress_callback); + } + else + { + Strings keys = configuration.keys; + auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + if (filter_dag) + { + std::vector paths; + paths.reserve(keys.size()); + for (const auto & key : keys) + paths.push_back(fs::path(configuration.url.bucket) / key); + VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + } + return std::make_unique( + *configuration.client, + configuration.url.version_id, + keys, + configuration.url.bucket, + configuration.request_settings, + local_read_keys, + file_progress_callback); + } + }(); + if (configuration.url.archive_pattern.has_value()) + { + return std::make_shared( + std::move(basic_iterator), + configuration.url.archive_pattern.value(), + configuration.client, + configuration.url.bucket, + configuration.url.version_id, + configuration.request_settings, + local_context, + read_keys); + } + else + { + return basic_iterator; } - - return std::make_shared( - *configuration.client, configuration.url.version_id, keys, - configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback); } } bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getFormatCopy(), context, format_settings); } bool StorageS3::prefersLargeBlocks() const { - return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format); + return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(getFormatCopy()); } bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const { - return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context); + return FormatFactory::instance().checkParallelizeOutputAfterReading(getFormatCopy(), context); } void StorageS3::read( @@ -1241,6 +1434,7 @@ void StorageS3::read( size_t max_block_size, size_t num_streams) { + updateConfiguration(local_context); auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) @@ -1267,7 +1461,6 @@ void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - createIterator(predicate); } @@ -1277,17 +1470,21 @@ void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) return; iterator_wrapper = createFileIterator( - query_configuration, storage.distributed_processing, context, predicate, - virtual_columns, nullptr, context->getFileProgressCallback()); + storage.getConfigurationCopy(), + storage.distributed_processing, + context, + predicate, + virtual_columns, + nullptr, + context->getFileProgressCallback()); } void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - if (storage.partition_by && query_configuration.withWildcard()) + if (storage.partition_by && query_configuration.withPartitionWildcard()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet"); createIterator(nullptr); - size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); if (estimated_keys_count > 1) num_streams = std::min(num_streams, estimated_keys_count); @@ -1297,9 +1494,8 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, num_streams = 1; } - const auto & settings = context->getSettingsRef(); - const size_t max_parsing_threads = num_streams >= settings.max_parsing_threads ? 1 : (settings.max_parsing_threads / std::max(num_streams, 1ul)); - LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); + const size_t max_threads = context->getSettingsRef().max_threads; + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); Pipes pipes; pipes.reserve(num_streams); @@ -1336,17 +1532,22 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, pipeline.init(std::move(pipe)); } -SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) +SinkToStoragePtr StorageS3::write( + const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) { auto query_configuration = updateConfigurationAndGetCopy(local_context); auto key = query_configuration.keys.front(); + if (query_configuration.withGlobsIgnorePartitionWildcard()) + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); + auto sample_block = metadata_snapshot->getSampleBlock(); auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method); auto insert_query = std::dynamic_pointer_cast(query); auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; - bool is_partitioned_implementation = partition_by_ast && query_configuration.withWildcard(); + bool is_partitioned_implementation = partition_by_ast && query_configuration.withPartitionWildcard(); if (is_partitioned_implementation) { @@ -1363,12 +1564,9 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr } else { - if (query_configuration.withGlobs()) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key); - - if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, configuration, query_configuration.keys.front(), query_configuration.keys.size())) + if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(local_context, query_configuration, query_configuration.keys.front(), query_configuration.keys.size())) { + std::lock_guard lock{configuration_update_mutex}; query_configuration.keys.push_back(*new_key); configuration.keys.push_back(*new_key); key = *new_key; @@ -1417,10 +1615,9 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError(); auto time_now = std::chrono::system_clock::now(); if (auto blob_storage_log = BlobStorageLogWriter::create()) - { for (const auto & key : query_configuration.keys) - blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); - } + blob_storage_log->addEvent( + BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now); if (!response.IsSuccess()) { @@ -1445,18 +1642,24 @@ void StorageS3::updateConfiguration(const ContextPtr & local_context) configuration.update(local_context); } -void StorageS3::useConfiguration(const Configuration & new_configuration) +void StorageS3::useConfiguration(const StorageS3::Configuration & new_configuration) { std::lock_guard lock(configuration_update_mutex); configuration = new_configuration; } -const StorageS3::Configuration & StorageS3::getConfiguration() +StorageS3::Configuration StorageS3::getConfigurationCopy() const { std::lock_guard lock(configuration_update_mutex); return configuration; } +String StorageS3::getFormatCopy() const +{ + std::lock_guard lock(configuration_update_mutex); + return configuration.format; +} + bool StorageS3::Configuration::update(const ContextPtr & context) { auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); @@ -1511,7 +1714,8 @@ void StorageS3::Configuration::connect(const ContextPtr & context) .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), }; - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); + auto credentials + = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); client = S3::ClientFactory::instance().create( client_configuration, client_settings, @@ -1530,6 +1734,14 @@ void StorageS3::Configuration::connect(const ContextPtr & context) credentials.GetSessionToken()); } +bool StorageS3::Configuration::withGlobsIgnorePartitionWildcard() const +{ + if (!withPartitionWildcard()) + return withGlobs(); + + return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; +} + void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); @@ -1544,10 +1756,12 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); - configuration.auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); + configuration.auth_settings.expiration_window_seconds + = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); configuration.format = collection.getOrDefault("format", configuration.format); - configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); + configuration.compression_method + = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); configuration.structure = collection.getOrDefault("structure", "auto"); configuration.request_settings = S3Settings::RequestSettings(collection); @@ -1583,8 +1797,8 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C if (count == 0 || count > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage S3 requires 1 to 5 arguments: " - "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); + "Storage S3 requires 1 to 6 positional arguments: " + "url, [NOSIGN | access_key_id, secret_access_key], [session_token], [name of used format], [compression_method], [headers], [extra_credentials]"); std::unordered_map engine_args_to_idx; bool no_sign_request = false; @@ -1636,13 +1850,9 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; - } else - { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}}; - } } } /// For 5 arguments we support 2 possible variants: @@ -1652,17 +1862,14 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) - { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; - } else - { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; - } } else if (count == 6) { - engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; + engine_args_to_idx + = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; } /// This argument is always the first @@ -1672,80 +1879,140 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C configuration.format = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["format"]], "format"); if (engine_args_to_idx.contains("compression_method")) - configuration.compression_method = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); + configuration.compression_method + = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["compression_method"]], "compression_method"); if (engine_args_to_idx.contains("access_key_id")) - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); + configuration.auth_settings.access_key_id + = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id"); if (engine_args_to_idx.contains("secret_access_key")) - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); + configuration.auth_settings.secret_access_key + = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key"); if (engine_args_to_idx.contains("session_token")) - configuration.auth_settings.session_token = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); + configuration.auth_settings.session_token + = checkAndGetLiteralArgument(engine_args[engine_args_to_idx["session_token"]], "session_token"); if (no_sign_request) configuration.auth_settings.no_sign_request = no_sign_request; } - configuration.static_configuration = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); + configuration.static_configuration + = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value(); configuration.keys = {configuration.url.key}; if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.url.key).value_or("auto"); + { + if (configuration.url.archive_pattern.has_value()) + { + configuration.format = FormatFactory::instance() + .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) + .value_or("auto"); + } + else + { + configuration.format + = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); + } + } return configuration; } ColumnsDescription StorageS3::getTableStructureFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) + const StorageS3::Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx) { - return getTableStructureAndFormatFromDataImpl(configuration.format, configuration, format_settings, ctx).first; + return getTableStructureAndFormatFromDataImpl(configuration_.format, configuration_, format_settings_, ctx).first; } std::pair StorageS3::getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx) + const StorageS3::Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx) { return getTableStructureAndFormatFromDataImpl(std::nullopt, configuration, format_settings, ctx); } -namespace +class ReadBufferIterator : public IReadBufferIterator, WithContext { - class ReadBufferIterator : public IReadBufferIterator, WithContext +public: + ReadBufferIterator( + std::shared_ptr file_iterator_, + const StorageS3Source::KeysWithInfo & read_keys_, + const StorageS3::Configuration & configuration_, + std::optional format_, + const std::optional & format_settings_, + ContextPtr context_) + : WithContext(context_) + , file_iterator(file_iterator_) + , read_keys(read_keys_) + , configuration(configuration_) + , format(std::move(format_)) + , format_settings(format_settings_) + , prev_read_keys_size(read_keys_.size()) { - public: - ReadBufferIterator( - std::shared_ptr file_iterator_, - const StorageS3Source::KeysWithInfo & read_keys_, - const StorageS3::Configuration & configuration_, - std::optional format_, - const std::optional & format_settings_, - const ContextPtr & context_) - : WithContext(context_) - , file_iterator(file_iterator_) - , read_keys(read_keys_) - , configuration(configuration_) - , format(std::move(format_)) - , format_settings(format_settings_) - , prev_read_keys_size(read_keys_.size()) + } + + Data next() override + { + if (first) { + /// If format is unknown we iterate through all currently read keys on first iteration and + /// try to determine format by file name. + if (!format) + { + for (const auto & key_with_info : read_keys) + { + if (auto format_from_file_name + = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->formatInferenceName())) + { + format = format_from_file_name; + break; + } + } + } + + /// For default mode check cached columns for currently read keys on first iteration. + if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + { + if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + return {nullptr, cached_columns, format}; + } } - Data next() override + while (true) { - if (first) + current_key_with_info = (*file_iterator)(); + + if (!current_key_with_info || current_key_with_info->key.empty()) { - /// If format is unknown we iterate through all currently read keys on first iteration and - /// try to determine format by file name. + if (first) + { + if (format) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "The table structure cannot be extracted from a {} format file, because there are no files with provided path " + "in S3 or all files are empty. You can specify table structure manually", + *format); + + throw Exception( + ErrorCodes::CANNOT_DETECT_FORMAT, + "The data format cannot be detected by the contents of the files, because there are no files with provided path " + "in S3 or all files are empty. You can specify the format manually"); + } + + return {nullptr, std::nullopt, format}; + } + + if (read_keys.size() > prev_read_keys_size) + { + /// If format is unknown we can try to determine it by new file names. if (!format) { - for (const auto & key_with_info : read_keys) + for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->key)) + if (auto format_from_file_name + = FormatFactory::instance().tryGetFormatFromFileName((*it)->formatInferenceName())) { format = format_from_file_name; break; @@ -1753,228 +2020,209 @@ namespace } } - /// For default mode check cached columns for currently read keys on first iteration. - if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) + /// Check new files in schema cache if schema inference mode is default. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) { - if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) - return {nullptr, cached_columns, format}; + auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); + if (columns_from_cache) + return {nullptr, columns_from_cache, format}; } + + prev_read_keys_size = read_keys.size(); } - while (true) + if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) + continue; + + /// In union mode, check cached columns only for current key. + if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) { - current_key_with_info = (*file_iterator)(); - - if (!current_key_with_info || current_key_with_info->key.empty()) - { - if (first) - { - if (format) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "The table structure cannot be extracted from a {} format file, because there are no files with provided path " - "in S3 or all files are empty. You can specify table structure manually", - *format); - - throw Exception( - ErrorCodes::CANNOT_DETECT_FORMAT, - "The data format cannot be detected by the contents of the files, because there are no files with provided path " - "in S3 or all files are empty. You can specify the format manually"); - } - - return {nullptr, std::nullopt, format}; - } - - /// S3 file iterator could get new keys after new iteration - if (read_keys.size() > prev_read_keys_size) - { - /// If format is unknown we can try to determine it by new file names. - if (!format) - { - for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) - { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->key)) - { - format = format_from_file_name; - break; - } - } - } - - /// Check new files in schema cache if schema inference mode is default. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) - { - auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end()); - if (columns_from_cache) - return {nullptr, columns_from_cache, format}; - } - - prev_read_keys_size = read_keys.size(); - } - - if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0) - continue; - - /// In union mode, check cached columns only for current key. - if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION) - { - StorageS3::KeysWithInfo keys = {current_key_with_info}; - if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) - { - first = false; - return {nullptr, columns_from_cache, format}; - } - } - - int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) + StorageS3Source::KeysWithInfo keys = {current_key_with_info}; + if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end())) { first = false; - return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt, format}; + return {nullptr, columns_from_cache, format}; } } - } - void setNumRowsToLastFile(size_t num_rows) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); - } - - void setSchemaToLastFile(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) - return; - - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key; - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); - } - - void setResultingSchema(const ColumnsDescription & columns) override - { - if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 - || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) - return; - - auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; - Strings sources; - sources.reserve(read_keys.size()); - std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; }); - auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); - StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); - } - - void setFormatName(const String & format_name) override - { - format = format_name; - } - - String getLastFileName() const override - { - if (current_key_with_info) - return current_key_with_info->key; - return ""; - } - - bool supportsLastReadBufferRecreation() const override { return true; } - - std::unique_ptr recreateLastReadBuffer() override - { - chassert(current_key_with_info); int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); - return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); - } + std::unique_ptr impl; - private: - std::optional tryGetColumnsFromCache( - const StorageS3::KeysWithInfo::const_iterator & begin, - const StorageS3::KeysWithInfo::const_iterator & end) - { - auto context = getContext(); - if (!context->getSettingsRef().schema_inference_use_cache_for_s3) - return std::nullopt; - - auto & schema_cache = StorageS3::getSchemaCache(context); - for (auto it = begin; it < end; ++it) + if (!current_key_with_info->path_in_archive.has_value()) { - auto get_last_mod_time = [&] + impl = std::make_unique( + configuration.client, + configuration.url.bucket, + current_key_with_info->key, + configuration.url.version_id, + configuration.request_settings, + getContext()->getReadSettings()); + } + else + { + assert(current_key_with_info->archive_reader); + impl = current_key_with_info->archive_reader->readFile( + current_key_with_info->path_in_archive.value(), /*throw_on_not_found=*/true); + } + if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) + { + first = false; + // We do not need to use any data decompression algorithm if we take data from an archive because it will be decompressed automatically. + return { + wrapReadBufferWithCompressionMethod( + std::move(impl), + current_key_with_info->path_in_archive.has_value() + ? chooseCompressionMethod(current_key_with_info->path_in_archive.value(), configuration.compression_method) + : chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), + zstd_window_log_max), + std::nullopt, + format}; + } + } + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3) + return; + + String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) + / configuration.url.bucket / current_key_with_info->getPath(); + auto key = getKeyForSchemaCache(source, *format, format_settings, getContext()); + StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + void setSchemaToLastFile(const ColumnsDescription & columns) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 + || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION) + return; + + String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) + / configuration.url.bucket / current_key_with_info->getPath(); + auto cache_key = getKeyForSchemaCache(source, *format, format_settings, getContext()); + StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns); + } + + void setResultingSchema(const ColumnsDescription & columns) override + { + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3 + || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT) + return; + + auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket; + Strings sources; + sources.reserve(read_keys.size()); + std::transform( + read_keys.begin(), + read_keys.end(), + std::back_inserter(sources), + [&](const auto & elem) { return host_and_bucket / elem->getPath(); }); + auto cache_keys = getKeysForSchemaCache(sources, *format, format_settings, getContext()); + StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); + } + + void setFormatName(const String & format_name) override + { + format = format_name; + } + + String getLastFileName() const override + { + if (current_key_with_info) + return current_key_with_info->getPath(); + return ""; + } + + bool supportsLastReadBufferRecreation() const override { return true; } + + std::unique_ptr recreateLastReadBuffer() override + { + chassert(current_key_with_info); + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + auto impl = std::make_unique(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings()); + return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max); + } + +private: + std::optional tryGetColumnsFromCache( + const StorageS3Source::KeysWithInfo::const_iterator & begin, const StorageS3Source::KeysWithInfo::const_iterator & end) + { + auto context = getContext(); + if (!context->getSettingsRef().schema_inference_use_cache_for_s3) + return std::nullopt; + + auto & schema_cache = StorageS3::getSchemaCache(context); + for (auto it = begin; it < end; ++it) + { + auto get_last_mod_time = [&] + { + time_t last_modification_time = 0; + if ((*it)->info) { - time_t last_modification_time = 0; - if ((*it)->info) - { - last_modification_time = (*it)->info->last_modification_time; - } - else - { - /// Note that in case of exception in getObjectInfo returned info will be empty, - /// but schema cache will handle this case and won't return columns from cache - /// because we can't say that it's valid without last modification time. - last_modification_time = S3::getObjectInfo( - *configuration.client, - configuration.url.bucket, - (*it)->key, - configuration.url.version_id, - configuration.request_settings, - /*with_metadata=*/ false, - /*throw_on_error= */ false).last_modification_time; - } - - return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; - }; - - String path = fs::path(configuration.url.bucket) / (*it)->key; - String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; - - if (format) - { - auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - return columns; + last_modification_time = (*it)->info->last_modification_time; } else { - /// If format is unknown, we can iterate through all possible input formats - /// and check if we have an entry with this format and this file in schema cache. - /// If we have such entry for some format, we can use this format to read the file. - for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + /// Note that in case of exception in getObjectInfo returned info will be empty, + /// but schema cache will handle this case and won't return columns from cache + /// because we can't say that it's valid without last modification time. + last_modification_time = S3::getObjectInfo( + *configuration.client, + configuration.url.bucket, + (*it)->key, + configuration.url.version_id, + configuration.request_settings, + /*with_metadata=*/ false, + /*throw_on_error= */ false).last_modification_time; + } + + return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt; + }; + String path = fs::path(configuration.url.bucket) / (*it)->getPath(); + + String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path; + + if (format) + { + auto cache_key = getKeyForSchemaCache(source, *format, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) + return columns; + } + else + { + /// If format is unknown, we can iterate through all possible input formats + /// and check if we have an entry with this format and this file in schema cache. + /// If we have such entry fcreateor some format, we can use this format to read the file. + for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) + { + auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); + if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) { - auto cache_key = getKeyForSchemaCache(source, format_name, format_settings, context); - if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) - { - /// Now format is known. It should be the same for all files. - format = format_name; - return columns; - } + /// Now format is known. It should be the same for all files. + format = format_name; + return columns; } } } - - return std::nullopt; } - std::shared_ptr file_iterator; - const StorageS3Source::KeysWithInfo & read_keys; - const StorageS3::Configuration & configuration; - std::optional format; - const std::optional & format_settings; - StorageS3Source::KeyWithInfoPtr current_key_with_info; - size_t prev_read_keys_size; - bool first = true; - }; + return std::nullopt; + } -} + std::shared_ptr file_iterator; + const StorageS3Source::KeysWithInfo & read_keys; + const StorageS3::Configuration & configuration; + std::optional format; + const std::optional & format_settings; + StorageS3Source::KeyWithInfoPtr current_key_with_info; + size_t prev_read_keys_size; + bool first = true; +}; std::pair StorageS3::getTableStructureAndFormatFromDataImpl( std::optional format, - const Configuration & configuration, + const StorageS3::Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx) { @@ -2071,7 +2319,6 @@ SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx) static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS)); return schema_cache; } - } #endif diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index c8ab28fb20e..7711483b186 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -1,7 +1,10 @@ #pragma once -#include "config.h" - +#include +#include +#include "IO/Archives/IArchiveReader.h" +#include "IO/Archives/createArchiveReader.h" +#include "IO/ReadBuffer.h" #if USE_AWS_S3 #include @@ -23,36 +26,52 @@ #include #include -#include - -namespace fs = std::filesystem; - namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + class PullingPipelineExecutor; class NamedCollection; class StorageS3Source : public SourceWithKeyCondition, WithContext { public: - struct KeyWithInfo { KeyWithInfo() = default; - explicit KeyWithInfo(String key_, std::optional info_ = std::nullopt) - : key(std::move(key_)), info(std::move(info_)) {} + explicit KeyWithInfo( + String key_, + std::optional info_ = std::nullopt, + std::optional path_in_archive_ = std::nullopt, + std::shared_ptr archive_reader_ = nullptr) + : key(std::move(key_)) + , info(std::move(info_)) + , path_in_archive(std::move(path_in_archive_)) + , archive_reader(std::move(archive_reader_)) + { + if (path_in_archive.has_value() != (archive_reader != nullptr)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Archive reader and path in archive must exist simultaneously"); + } virtual ~KeyWithInfo() = default; String key; std::optional info; + std::optional path_in_archive; + std::shared_ptr archive_reader; + + String getPath() { return path_in_archive.has_value() ? (key + "::" + path_in_archive.value()) : key; } + String formatInferenceName() { return path_in_archive.has_value() ? path_in_archive.value() : key; } }; + using KeyWithInfoPtr = std::shared_ptr; using KeysWithInfo = std::vector; - class IIterator { public: @@ -65,7 +84,7 @@ public: /// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results. virtual size_t estimatedKeysCount() = 0; - KeyWithInfoPtr operator ()() { return next(); } + KeyWithInfoPtr operator()() { return next(); } }; class DisclosedGlobIterator : public IIterator @@ -126,6 +145,41 @@ public: ReadTaskCallback callback; }; + class ArchiveIterator : public IIterator, public WithContext + { + public: + explicit ArchiveIterator( + std::unique_ptr basic_iterator_, + const std::string & archive_pattern_, + std::shared_ptr client_, + const String & bucket_, + const String & version_id_, + const S3Settings::RequestSettings & request_settings, + ContextPtr context_, + KeysWithInfo * read_keys_); + + KeyWithInfoPtr next(size_t) override; /// NOLINT + size_t estimatedKeysCount() override; + void refreshArchiveReader(); + + private: + std::unique_ptr basic_iterator; + KeyWithInfoPtr basic_key_with_info_ptr; + std::unique_ptr basic_read_buffer; + std::shared_ptr archive_reader{nullptr}; + std::unique_ptr file_enumerator = nullptr; + std::string path_in_archive = {}; // used when reading a single file from archive + IArchiveReader::NameFilter filter = {}; // used when files inside archive are defined with a glob + std::shared_ptr client; + const String bucket; + const String version_id; + S3Settings::RequestSettings request_settings; + std::mutex take_next_mutex; + KeysWithInfo * read_keys; + }; + + friend StorageS3Source::ArchiveIterator; + StorageS3Source( const ReadFromFormatInfo & info, const String & format, @@ -194,10 +248,7 @@ private: ReaderHolder(const ReaderHolder & other) = delete; ReaderHolder & operator=(const ReaderHolder & other) = delete; - ReaderHolder(ReaderHolder && other) noexcept - { - *this = std::move(other); - } + ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); } ReaderHolder & operator=(ReaderHolder && other) noexcept { @@ -215,8 +266,22 @@ private: explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - String getPath() const { return fs::path(bucket) / key_with_info->key; } - const String & getFile() const { return key_with_info->key; } + String getPath() const + { + return key_with_info->path_in_archive.has_value() + ? (bucket + "/" + key_with_info->key + "::" + key_with_info->path_in_archive.value()) + : (bucket + "/" + key_with_info->key); + } + const String & getFile() const + { + return key_with_info->path_in_archive.has_value() ? key_with_info->path_in_archive.value() : key_with_info->key; + } + String getFileExtended() const + { + return key_with_info->path_in_archive.has_value() ? (String{key_with_info->key} + "::" + key_with_info->path_in_archive.value()) + : key_with_info->key; + } + bool isArchive() { return key_with_info->path_in_archive.has_value(); } const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; } std::optional getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; } @@ -255,9 +320,6 @@ private: ReaderHolder createReader(size_t idx = 0); std::future createReaderAsync(size_t idx = 0); - std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); - std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); - void addNumRowsToCache(const String & key, size_t num_rows); std::optional tryGetNumRowsFromCache(const KeyWithInfo & key_with_info); }; @@ -274,7 +336,7 @@ public: { Configuration() = default; - String getPath() const { return url.key; } + const String & getPath() const { return url.key; } bool update(const ContextPtr & context); @@ -282,13 +344,14 @@ public: bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; } - bool withWildcard() const + bool withPartitionWildcard() const { static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos - || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; + return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; } + bool withGlobsIgnorePartitionWildcard() const; + S3::URI url; S3::AuthSettings auth_settings; S3Settings::RequestSettings request_settings; @@ -313,10 +376,7 @@ public: bool distributed_processing_ = false, ASTPtr partition_by_ = nullptr); - String getName() const override - { - return name; - } + String getName() const override { return name; } void read( QueryPlan & query_plan, @@ -328,27 +388,25 @@ public: size_t max_block_size, size_t num_streams) override; - SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; + SinkToStoragePtr + write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; + void truncate( + const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; bool supportsPartitionBy() const override; - static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection); + static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); static SchemaCache & getSchemaCache(const ContextPtr & ctx); - static StorageS3::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true); + static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file = true); static ColumnsDescription getTableStructureFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); + const Configuration & configuration_, const std::optional & format_settings_, const ContextPtr & ctx); static std::pair getTableStructureAndFormatFromData( - const StorageS3::Configuration & configuration, - const std::optional & format_settings, - const ContextPtr & ctx); + const Configuration & configuration, const std::optional & format_settings, const ContextPtr & ctx); using KeysWithInfo = StorageS3Source::KeysWithInfo; @@ -361,7 +419,9 @@ protected: void useConfiguration(const Configuration & new_configuration); - const Configuration & getConfiguration(); + Configuration getConfigurationCopy() const; + + String getFormatCopy() const; private: friend class StorageS3Cluster; @@ -370,7 +430,7 @@ private: friend class ReadFromStorageS3Step; Configuration configuration; - std::mutex configuration_update_mutex; + mutable std::mutex configuration_update_mutex; String name; const bool distributed_processing; @@ -392,6 +452,24 @@ private: bool parallelizeOutputAfterReading(ContextPtr context) const override; }; +std::unique_ptr createS3ReadBuffer( + const String & key, + size_t object_size, + std::shared_ptr context, + std::shared_ptr client_ptr, + const String & bucket, + const String & version_id, + const S3Settings::RequestSettings & request_settings); + +std::unique_ptr createAsyncS3ReadBuffer( + const String & key, + const ReadSettings & read_settings, + size_t object_size, + std::shared_ptr context, + std::shared_ptr client_ptr, + const String & bucket, + const String & version_id, + const S3Settings::RequestSettings & request_settings); } #endif diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 6b22771b38f..0060450eea7 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -91,7 +91,14 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, predicate, getVirtualsList(), context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); + *s3_configuration.client, + s3_configuration.url, + predicate, + getVirtualsList(), + context, + nullptr, + s3_configuration.request_settings, + context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index a8c100ebd44..dfb427a3bba 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -216,7 +216,19 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.auth_settings.no_sign_request = no_sign_request; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(url).getPath()).value_or("auto"); + { + if (configuration.url.archive_pattern.has_value()) + { + configuration.format = FormatFactory::instance() + .tryGetFormatFromFileName(Poco::URI(configuration.url.archive_pattern.value()).getPath()) + .value_or("auto"); + } + else + { + configuration.format + = FormatFactory::instance().tryGetFormatFromFileName(Poco::URI(configuration.url.uri_str).getPath()).value_or("auto"); + } + } } configuration.keys = {configuration.url.key}; diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference new file mode 100644 index 00000000000..36ced212a1b --- /dev/null +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -0,0 +1,52 @@ +1 Str1 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql new file mode 100644 index 00000000000..98ca0425174 --- /dev/null +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -0,0 +1,22 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); +select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); +SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); +SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError INCORRECT_QUERY } +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) \ No newline at end of file diff --git a/tests/queries/0_stateless/data_minio/03036_archive1.tar b/tests/queries/0_stateless/data_minio/03036_archive1.tar new file mode 100644 index 0000000000000000000000000000000000000000..55b3ddc745a2af5a59229a0ecabaee0e52d6e7dd GIT binary patch literal 10240 zcmeIx(F(#K7=~ewcNJsDBYrnGyN|A5wH*W%v1xSqEhEUI24mFV^+e@Y(MP`S-kh(e zPHzswO)b=9Y;|qa*lAnIXa6jTlv-(1ixH_zIo5Syi%O-k(0!s~{otBDO_ksKi}O(_ zntZEAi`O$bwt3sQCeeAebG^=zA`FUbJx}1KGX%O<{>{JSzp^&u-x%^g<69?hedphX z*xM}GhQXHnPbOp`0tg_000IagfB*srAbt30;YM)7 z)qw_U;4%Cf*zmANMvTTjAj27%|I>1_nkT-qHwSq56as;uADqAe)HkU}O_x d5hg;N4>S?vd<+v=*+8~40pSuLy$i%)000M4P!|9I literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/data_minio/03036_archive2.tar b/tests/queries/0_stateless/data_minio/03036_archive2.tar new file mode 100644 index 0000000000000000000000000000000000000000..4cc3f6830a57979e88d1ea5eeeb16e43a470f925 GIT binary patch literal 10240 zcmeIzOAdlC7)H^KSq16Xn7`16uEQ0eGB8nN6pYJPD;gO|f-jQWN!zqQIpw3}so3n+ zWj?q zHPq_78gES=PG_|oEsLs1+(^%=z0--04X=mu3aqq;KzYl*{hR!U5c6*>`R}o+a~{<|6lp8?)jg_*iUt0B({w32l&bc@DKzL mKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0;rO#)|UT~~1c literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/data_minio/03036_archive2.zip b/tests/queries/0_stateless/data_minio/03036_archive2.zip new file mode 100644 index 0000000000000000000000000000000000000000..8b49dc8d9f4d529f79eb0faf9d99acf401a6d4d1 GIT binary patch literal 372 zcmWIWW@Zs#U|`^2;4Ep4NGZ6oT^z`h1Y#Zr8HUt~#N2|MRAas5;D`hbCf5s0@mf>@|NVTJev%@@cfVmcVv e#8`xhQ0D_p1UVnWL{>JC?My(p1W4}!aTow}oKKYi literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/data_minio/03036_archive3.tar.gz b/tests/queries/0_stateless/data_minio/03036_archive3.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..888717640710248dc8668d69a15ae0c6864832c7 GIT binary patch literal 185 zcmb2|=3oE==C@aT`I-y_93HN-WKHxv;}d<#(Z*3!Of8GY{@CMg0pYM+OeU50mrdT% zeRI#X{)(M*vv*r>E8i?2+43NM`LxzE=R1QJ+j%6$O6c{h(A{giQfKRhnEPcLJ%caH z|H{5zU-A9vKmWO__H@Pi#LRoU`&eqHJp26Iy%J%8{(swZf9oInx7Q=EtLN`-hyU3X h-`87}t)Hcuwfy2Ic8K#Jx)000@hQpo@S literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/data_minio/03036_compressed_file_archive.zip b/tests/queries/0_stateless/data_minio/03036_compressed_file_archive.zip new file mode 100644 index 0000000000000000000000000000000000000000..619f81327a8b321b4366cb5906ba871d94271136 GIT binary patch literal 231 zcmWIWW@h1H0D&-{g%L_Ntq(1MY!K#QkYPxzNX#wBNj29?E-niV;bdUm!ef*s#%q*T zTEWf0$nt`jfdNd&cXM!nq!^fybugUvx#;Ps Date: Tue, 30 Apr 2024 16:33:22 +0200 Subject: [PATCH 177/651] Make init-network.sh sourceable, tune runner's tailscale hostname --- tests/ci/worker/prepare-ci-ami.sh | 67 +++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index effc224c2d5..03de3309db2 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -155,31 +155,56 @@ apt-get install tailscale --yes --no-install-recommends # Create a common script for the instances mkdir /usr/local/share/scripts -p -cat > /usr/local/share/scripts/init-network.sh << 'EOF' -#!/usr/bin/env bash +setup_cloudflare_dns() { + # Add cloudflare DNS as a fallback + # Get default gateway interface + local IFACE ETH_DNS CLOUDFLARE_NS new_dns + IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) + # `Link 2 (eth0): 172.31.0.2` + ETH_DNS=$(resolvectl dns "$IFACE") || : + CLOUDFLARE_NS=1.1.1.1 + if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then + # Cut the leading legend + ETH_DNS=${ETH_DNS#*: } + # shellcheck disable=SC2206 + new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") + resolvectl dns "$IFACE" "${new_dns[@]}" + fi +} -# Add cloudflare DNS as a fallback -# Get default gateway interface -IFACE=$(ip --json route list | jq '.[]|select(.dst == "default").dev' --raw-output) -# `Link 2 (eth0): 172.31.0.2` -ETH_DNS=$(resolvectl dns "$IFACE") || : -CLOUDFLARE_NS=1.1.1.1 -if [[ "$ETH_DNS" ]] && [[ "${ETH_DNS#*: }" != *"$CLOUDFLARE_NS"* ]]; then - # Cut the leading legend - ETH_DNS=${ETH_DNS#*: } - # shellcheck disable=SC2206 - new_dns=(${ETH_DNS} "$CLOUDFLARE_NS") - resolvectl dns "$IFACE" "${new_dns[@]}" -fi +setup_tailscale() { + # Setup tailscale, the very first action + local TS_API_CLIENT_ID TS_API_CLIENT_SECRET TS_AUTHKEY RUNNER_TYPE + TS_API_CLIENT_ID=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-id --query 'Parameter.Value' --output text --with-decryption) + TS_API_CLIENT_SECRET=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-secret --query 'Parameter.Value' --output text --with-decryption) -# Setup tailscale, the very first action -TS_API_CLIENT_ID=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-id --query 'Parameter.Value' --output text --with-decryption) -TS_API_CLIENT_SECRET=$(aws ssm get-parameter --region us-east-1 --name /tailscale/api-client-secret --query 'Parameter.Value' --output text --with-decryption) -export TS_API_CLIENT_ID TS_API_CLIENT_SECRET -TS_AUTHKEY=$(get-authkey -tags tag:svc-core-ci-github -reusable -ephemeral) -tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$INSTANCE_ID" + RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) + RUNNER_TYPE=${RUNNER_TYPE:-unknown} + # Clean possible garbage from the runner type + RUNNER_TYPE=${RUNNER_TYPE//[^0-9a-z]/-} + TS_AUTHKEY=$(TS_API_CLIENT_ID="$TS_API_CLIENT_ID" TS_API_CLIENT_SECRET="$TS_API_CLIENT_SECRET" \ + get-authkey -tags tag:svc-core-ci-github -reusable -ephemeral) + tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$RUNNER_TYPE-$INSTANCE_ID" +} + +cat > /usr/local/share/scripts/init-network.sh << EOF +!/usr/bin/env bash +$(declare -f setup_cloudflare_dns) + +$(declare -f setup_tailscale) + +# If the script is sourced, it will return now and won't execute functions +return 0 &>/dev/null || : + +echo Setup Cloudflare DNS +setup_cloudflare_dns + +echo Setup Tailscale VPN +setup_tailscale EOF +chmod +x /usr/local/share/scripts/init-network.sh + # The following line is used in aws TOE check. touch /var/tmp/clickhouse-ci-ami.success From ee49e237daa0cfb5b2c1b726449d8c718da69119 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Apr 2024 16:36:59 +0200 Subject: [PATCH 178/651] Clean github runner's temporary _work derectory between runs --- tests/ci/worker/init_runner.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index de1d128dc87..b3c5be04df3 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -312,6 +312,8 @@ while true; do no_terminating_metadata || terminate_on_event check_spot_instance_is_old && terminate_and_exit check_proceed_spot_termination force + # There were some failures to start the Job because of trash in _work + rm -rf _work echo "Going to configure runner" sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \ From cbf02f69d5ee9cf33cbd3e055fcb9a1605f5c4cb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 30 Apr 2024 16:59:29 +0200 Subject: [PATCH 179/651] Add a fallback for autoupdate if GH actions runner fails to start --- tests/ci/worker/init_runner.sh | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index b3c5be04df3..8613734a87e 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -300,11 +300,20 @@ list_children () { echo "$children" } +# https://github.com/actions/runner/issues/3266 +# We're unable to know if the runner is failed to start. +# There's possibility that it fails because the runner's version is outdated, +# so after the first failure we'll try to launch it with enabled autoupdate. +# +# We'll fail and terminate after 10 consequent failures. +ATTEMPT=0 + while true; do runner_pid=$(pgrep Runner.Listener) echo "Got runner pid '$runner_pid'" if [ -z "$runner_pid" ]; then + echo Attempt $((++ATTEMPT)) to start the runner cd $RUNNER_HOME || terminate_and_exit detect_delayed_termination # If runner is not active, check that it needs to terminate itself @@ -316,9 +325,21 @@ while true; do rm -rf _work echo "Going to configure runner" - sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \ - --ephemeral --disableupdate --unattended \ - --runnergroup Default --labels "$LABELS" --work _work --name "$INSTANCE_ID" + token_args=(--token "$(get_runner_token)") + config_args=( + "${token_args[@]}" --url "$RUNNER_URL" + --ephemeral --unattended --replace --runnergroup Default + --labels "$LABELS" --work _work --name "$INSTANCE_ID" + ) + if (( ATTEMPT > 1 )); then + echo 'The runner failed to start at least once. Removing it and then configuring with autoudate enabled.' + sudo -u ubuntu ./config.sh remove "${token_args[@]}" + sudo -u ubuntu ./config.sh "${config_args[@]}" + else + echo "Configure runner with disable autoupdate" + config_args+=("--disableupdate") + sudo -u ubuntu ./config.sh "${config_args[@]}" + fi echo "Another one check to avoid race between runner and infrastructure" no_terminating_metadata || terminate_on_event @@ -331,7 +352,11 @@ while true; do ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ ./run.sh & sleep 10 + elif (( ATTEMPT > 10 )); then + echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" + terminate_and_exit else + ATTEMPT=0 echo "Runner is working with pid $runner_pid, checking the metadata in background" check_proceed_spot_termination From 599c512e0be65a630df2c6fd2e5a7769085d8372 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 10 May 2024 19:36:20 +0200 Subject: [PATCH 180/651] Patch runner helpers and check the exit code for attempts --- tests/ci/worker/init_runner.sh | 73 +++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 8613734a87e..4c6a63563ed 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -300,20 +300,44 @@ list_children () { echo "$children" } -# https://github.com/actions/runner/issues/3266 -# We're unable to know if the runner is failed to start. # There's possibility that it fails because the runner's version is outdated, # so after the first failure we'll try to launch it with enabled autoupdate. # # We'll fail and terminate after 10 consequent failures. ATTEMPT=0 +# In `kill` 0 means "all processes in process group", -1 is "all but PID 1" +# We use `-2` to get an error +RUNNER_PID=-2 while true; do - runner_pid=$(pgrep Runner.Listener) - echo "Got runner pid '$runner_pid'" + # Does not send signal, but checks that the process $RUNNER_PID is running + if kill -0 -- $RUNNER_PID; then + ATTEMPT=0 + echo "Runner is working with pid $RUNNER_PID, checking the metadata in background" + check_proceed_spot_termination + + if ! is_job_assigned; then + RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) )) + echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" + if (( 60 < RUNNER_AGE )); then + echo "Attempt to delete the runner for a graceful shutdown" + sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ + || continue + echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" + terminate_and_exit + fi + fi + else + if [ "$RUNNER_PID" != "-2" ]; then + wait $RUNNER_PID \ + && echo "Runner with PID $RUNNER_PID successfully finished" \ + || echo "Attempt $((++ATTEMPT)) to start the runner" + fi + if (( ATTEMPT > 10 )); then + echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" + terminate_and_exit + fi - if [ -z "$runner_pid" ]; then - echo Attempt $((++ATTEMPT)) to start the runner cd $RUNNER_HOME || terminate_and_exit detect_delayed_termination # If runner is not active, check that it needs to terminate itself @@ -321,8 +345,6 @@ while true; do no_terminating_metadata || terminate_on_event check_spot_instance_is_old && terminate_and_exit check_proceed_spot_termination force - # There were some failures to start the Job because of trash in _work - rm -rf _work echo "Going to configure runner" token_args=(--token "$(get_runner_token)") @@ -346,32 +368,29 @@ while true; do check_spot_instance_is_old && terminate_and_exit check_proceed_spot_termination force + # There were some failures to start the Job because of trash in _work + rm -rf _work + + # https://github.com/actions/runner/issues/3266 + # We're unable to know if the runner is failed to start. + echo 'Monkey-patching run helpers to get genuine exit code of the runner' + for script in run.sh run-helper.sh.template; do + # shellcheck disable=SC2016 + grep -q 'exit 0$' "$script" && \ + sed 's/exit 0/exit $returnCode/' -i "$script" && \ + echo "Script $script is patched" + done + echo "Run" sudo -u ubuntu \ ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \ ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ ./run.sh & - sleep 10 - elif (( ATTEMPT > 10 )); then - echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" - terminate_and_exit - else - ATTEMPT=0 - echo "Runner is working with pid $runner_pid, checking the metadata in background" - check_proceed_spot_termination + RUNNER_PID=$! - if ! is_job_assigned; then - RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$runner_pid" 2>/dev/null || date +%s) )) - echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" - if (( 60 < RUNNER_AGE )); then - echo "Attempt to delete the runner for a graceful shutdown" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ - || continue - echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" - terminate_and_exit - fi - fi + sleep 10 fi + sleep 5 done From dab1e5002e5160080049a90f14d3394cc89acce7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 13 May 2024 12:47:47 +0200 Subject: [PATCH 181/651] Upgrade the runner version to the latest --- tests/ci/worker/prepare-ci-ami.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 03de3309db2..86a8fae20ee 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.315.0 +export RUNNER_VERSION=2.316.1 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { From baffd31423f5d64e47ef53a9232406fa91ba4060 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 13 May 2024 14:17:45 +0200 Subject: [PATCH 182/651] Fix typos --- tests/ci/worker/init_runner.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 4c6a63563ed..ec7793db2aa 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -318,7 +318,7 @@ while true; do if ! is_job_assigned; then RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) )) - echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" + echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job" if (( 60 < RUNNER_AGE )); then echo "Attempt to delete the runner for a graceful shutdown" sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ @@ -354,11 +354,11 @@ while true; do --labels "$LABELS" --work _work --name "$INSTANCE_ID" ) if (( ATTEMPT > 1 )); then - echo 'The runner failed to start at least once. Removing it and then configuring with autoudate enabled.' + echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.' sudo -u ubuntu ./config.sh remove "${token_args[@]}" sudo -u ubuntu ./config.sh "${config_args[@]}" else - echo "Configure runner with disable autoupdate" + echo "Configure runner with disabled autoupdate" config_args+=("--disableupdate") sudo -u ubuntu ./config.sh "${config_args[@]}" fi From dc8f8a9a1864be4abbbd8c4e8ea856664f079c4d Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 13 May 2024 12:37:03 +0000 Subject: [PATCH 183/651] Resolve several issues --- src/Storages/StorageS3.cpp | 20 +++++++------------- src/Storages/StorageS3.h | 23 +++++------------------ 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 5d5f6ee56d2..9768653f3fe 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1016,17 +1016,14 @@ Chunk StorageS3Source::generate() if (const auto * input_format = reader.getInputFormat()) chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); + String file_name = reader.getFile(); VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - requested_virtual_columns, - reader.getPath(), - reader.getFileSize(), - reader.isArchive() ? (&reader.getFile()) : nullptr); + chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize(), reader.isArchive() ? (&file_name) : nullptr); return chunk; } if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getFileExtended(), total_rows_in_file); + addNumRowsToCache(reader.getPath(), total_rows_in_file); total_rows_in_file = 0; @@ -1045,9 +1042,9 @@ Chunk StorageS3Source::generate() return {}; } -void StorageS3Source::addNumRowsToCache(const String & key, size_t num_rows) +void StorageS3Source::addNumRowsToCache(const String & bucket_with_key, size_t num_rows) { - String source = fs::path(url_host_and_port) / bucket / key; + String source = fs::path(url_host_and_port) / bucket_with_key; auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); } @@ -1963,8 +1960,7 @@ public: { for (const auto & key_with_info : read_keys) { - if (auto format_from_file_name - = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->formatInferenceName())) + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(key_with_info->getFileName())) { format = format_from_file_name; break; @@ -2011,8 +2007,7 @@ public: { for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) { - if (auto format_from_file_name - = FormatFactory::instance().tryGetFormatFromFileName((*it)->formatInferenceName())) + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) { format = format_from_file_name; break; @@ -2067,7 +2062,6 @@ public: if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof()) { first = false; - // We do not need to use any data decompression algorithm if we take data from an archive because it will be decompressed automatically. return { wrapReadBufferWithCompressionMethod( std::move(impl), diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 7711483b186..606c677f915 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -65,8 +65,8 @@ public: std::optional path_in_archive; std::shared_ptr archive_reader; - String getPath() { return path_in_archive.has_value() ? (key + "::" + path_in_archive.value()) : key; } - String formatInferenceName() { return path_in_archive.has_value() ? path_in_archive.value() : key; } + String getPath() const { return path_in_archive.has_value() ? (key + "::" + path_in_archive.value()) : key; } + String getFileName() const { return path_in_archive.has_value() ? path_in_archive.value() : key; } }; using KeyWithInfoPtr = std::shared_ptr; @@ -266,21 +266,8 @@ private: explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - String getPath() const - { - return key_with_info->path_in_archive.has_value() - ? (bucket + "/" + key_with_info->key + "::" + key_with_info->path_in_archive.value()) - : (bucket + "/" + key_with_info->key); - } - const String & getFile() const - { - return key_with_info->path_in_archive.has_value() ? key_with_info->path_in_archive.value() : key_with_info->key; - } - String getFileExtended() const - { - return key_with_info->path_in_archive.has_value() ? (String{key_with_info->key} + "::" + key_with_info->path_in_archive.value()) - : key_with_info->key; - } + String getPath() const { return bucket + "/" + key_with_info->getPath(); } + String getFile() const { return key_with_info->getFileName(); } bool isArchive() { return key_with_info->path_in_archive.has_value(); } const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; } std::optional getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; } @@ -320,7 +307,7 @@ private: ReaderHolder createReader(size_t idx = 0); std::future createReaderAsync(size_t idx = 0); - void addNumRowsToCache(const String & key, size_t num_rows); + void addNumRowsToCache(const String & bucket_with_key, size_t num_rows); std::optional tryGetNumRowsFromCache(const KeyWithInfo & key_with_info); }; From 7d0214018cf4319b1fa920d3d39a74afcdde1a29 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 13 May 2024 12:46:42 +0000 Subject: [PATCH 184/651] Enable truncate operation for object storage disks. --- src/Common/FailPoint.cpp | 3 +- src/Disks/DiskEncryptedTransaction.h | 7 +++ src/Disks/FakeDiskTransaction.h | 11 ++++ src/Disks/IDiskTransaction.h | 3 ++ .../ObjectStorages/DiskObjectStorage.cpp | 8 +++ src/Disks/ObjectStorages/DiskObjectStorage.h | 2 + .../DiskObjectStorageMetadata.cpp | 13 +++++ .../DiskObjectStorageMetadata.h | 1 + .../DiskObjectStorageTransaction.cpp | 52 +++++++++++++++++++ .../DiskObjectStorageTransaction.h | 2 + src/Disks/ObjectStorages/IMetadataStorage.h | 14 +++++ .../MetadataStorageFromDisk.cpp | 8 +++ .../ObjectStorages/MetadataStorageFromDisk.h | 2 +- ...taStorageFromDiskTransactionOperations.cpp | 37 +++++++++++++ ...dataStorageFromDiskTransactionOperations.h | 30 +++++++++++ src/Storages/StorageStripeLog.cpp | 11 ++++ .../test_log_family_s3/configs/minio.xml | 12 ----- .../configs/storage_configuration.xml | 34 ++++++++++++ tests/integration/test_log_family_s3/test.py | 38 +++++++++++++- 19 files changed, 273 insertions(+), 15 deletions(-) delete mode 100644 tests/integration/test_log_family_s3/configs/minio.xml create mode 100644 tests/integration/test_log_family_s3/configs/storage_configuration.xml diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 2434c6004ad..942ef7bc868 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -54,7 +54,8 @@ static struct InitFiu PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \ PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \ PAUSEABLE(dummy_pausable_failpoint) \ - ONCE(execute_query_calling_empty_set_result_func_on_exception) + ONCE(execute_query_calling_empty_set_result_func_on_exception)\ + REGULAR(stripe_log_sink_write_fallpoint) namespace FailPoints { diff --git a/src/Disks/DiskEncryptedTransaction.h b/src/Disks/DiskEncryptedTransaction.h index 6cb2941cc11..0d440b8eafd 100644 --- a/src/Disks/DiskEncryptedTransaction.h +++ b/src/Disks/DiskEncryptedTransaction.h @@ -244,6 +244,13 @@ public: return delegate_transaction->writeFile(wrapped_path, buf_size, mode, settings); } + /// Truncate file to the target size. + void truncateFile(const std::string & src_path, size_t target_size) override + { + auto wrapped_path = wrappedPath(src_path); + delegate_transaction->truncateFile(wrapped_path, target_size); + } + private: diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h index f83642eee56..65a42481e70 100644 --- a/src/Disks/FakeDiskTransaction.h +++ b/src/Disks/FakeDiskTransaction.h @@ -2,10 +2,16 @@ #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + /// Fake disk transaction implementation. /// Just execute all operations immediately, commit is noop operation. /// No support for atomicity and rollback. @@ -134,6 +140,11 @@ public: disk.createHardLink(src_path, dst_path); } + void truncateFile(const std::string & /* src_path */, size_t /* target_size */) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Operation is not implemented"); + } + private: IDisk & disk; }; diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index 7df1b71eb2b..1cc043da9c2 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -123,6 +123,9 @@ public: /// Create hardlink from `src_path` to `dst_path`. virtual void createHardLink(const std::string & src_path, const std::string & dst_path) = 0; + + /// Truncate file to the target size. + virtual void truncateFile(const std::string & src_path, size_t target_size) = 0; }; using DiskTransactionPtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index c43845116dd..1543910066e 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -132,6 +132,14 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat transaction->commit(); } +void DiskObjectStorage::truncateFile(const String & path, size_t size) +{ + LOG_TEST(log, "Truncate file operation {} to size : {}", path, size); + auto transaction = createObjectStorageTransaction(); + transaction->truncateFile(path, size); + transaction->commit(); +} + void DiskObjectStorage::copyFile( /// NOLINT const String & from_file_path, IDisk & to_disk, diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 88c5e3203b8..b1b44bccb09 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -84,6 +84,8 @@ public: void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override; + void truncateFile(const String & path, size_t size) override; + MetadataStoragePtr getMetadataStorage() override { return metadata_storage; } UInt32 getRefCount(const String & path) const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 19b8b51384f..44854633d65 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_FORMAT; + extern const int LOGICAL_ERROR; } void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) @@ -207,6 +208,18 @@ void DiskObjectStorageMetadata::addObject(ObjectStorageKey key, size_t size) keys_with_meta.emplace_back(std::move(key), ObjectMetadata{size, {}, {}}); } +ObjectKeyWithMetadata DiskObjectStorageMetadata::popLastObject() +{ + if (keys_with_meta.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pop last object from metadata {}. Metadata already empty", metadata_file_path); + + ObjectKeyWithMetadata object = std::move(keys_with_meta.back()); + keys_with_meta.pop_back(); + total_size -= object.metadata.size_bytes; + + return object; +} + bool DiskObjectStorageMetadata::getWriteFullObjectKeySetting() { #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h index 729d93af10d..4f45f5b7ddf 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -52,6 +52,7 @@ public: void addObject(ObjectStorageKey key, size_t size); + ObjectKeyWithMetadata popLastObject(); void deserialize(ReadBuffer & buf); void deserializeFromString(const std::string & data); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index d25add625e8..1df0cd92b32 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -559,6 +559,51 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation } }; +struct TruncateFileObjectStorageOperation final : public IDiskObjectStorageOperation +{ + std::string path; + size_t size; + + TruncateFileOperationOutcomePtr truncate_outcome; + + TruncateFileObjectStorageOperation( + IObjectStorage & object_storage_, + IMetadataStorage & metadata_storage_, + const std::string & path_, + size_t size_) + : IDiskObjectStorageOperation(object_storage_, metadata_storage_) + , path(path_) + , size(size_) + {} + + std::string getInfoForLog() const override + { + return fmt::format("TruncateFileObjectStorageOperation (path: {}, size: {})", path, size); + } + + void execute(MetadataTransactionPtr tx) override + { + if (metadata_storage.exists(path)) + { + if (!metadata_storage.isFile(path)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not a file", path); + + truncate_outcome = tx->truncateFile(path,size); + } + } + + void undo() override + { + + } + + void finalize() override + { + if (!truncate_outcome->objects_to_remove.empty()) + object_storage.removeObjectsIfExist(truncate_outcome->objects_to_remove); + } +}; + } void DiskObjectStorageTransaction::createDirectory(const std::string & path) @@ -598,6 +643,13 @@ void DiskObjectStorageTransaction::moveFile(const String & from_path, const Stri })); } +void DiskObjectStorageTransaction::truncateFile(const String & path, size_t size) +{ + operations_to_execute.emplace_back( + std::make_unique(object_storage, metadata_storage, path, size) + ); +} + void DiskObjectStorageTransaction::replaceFile(const std::string & from_path, const std::string & to_path) { auto operation = std::make_unique(object_storage, metadata_storage, from_path, to_path); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h index 67044751b84..23f66990d54 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h @@ -92,6 +92,8 @@ public: void createFile(const String & path) override; + void truncateFile(const String & path, size_t size) override; + void copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings &) override; /// writeFile is a difficult function for transactions. diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index f95db2e1eee..feb8707e71b 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -31,7 +31,15 @@ struct UnlinkMetadataFileOperationOutcome UInt32 num_hardlinks = std::numeric_limits::max(); }; +struct TruncateFileOperationOutcome +{ + StoredObjects objects_to_remove; +}; + + using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; +using TruncateFileOperationOutcomePtr = std::shared_ptr; + /// Tries to provide some "transactions" interface, which allow /// to execute (commit) operations simultaneously. We don't provide @@ -143,6 +151,12 @@ public: return nullptr; } + virtual TruncateFileOperationOutcomePtr truncateFile(const std::string & /* path */, size_t /* size */) + { + throwNotImplemented(); + return nullptr; + } + virtual ~IMetadataTransaction() = default; private: diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 9b9c4eb388c..a6570e58d8a 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -339,4 +339,12 @@ UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromDiskTransaction::unlink return result; } +TruncateFileOperationOutcomePtr MetadataStorageFromDiskTransaction::truncateFile(const std::string & path, size_t target_size) +{ + auto operation = std::make_unique(path, target_size, metadata_storage, *metadata_storage.getDisk()); + auto result = operation->outcome; + addOperation(std::move(operation)); + return result; +} + } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 7059d8e9a6a..046de76e762 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -135,7 +135,7 @@ public: UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; - + TruncateFileOperationOutcomePtr truncateFile(const std::string & src_path, size_t target_size) override; }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index 1357acdfc66..f2d7a1fe9dd 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -4,9 +4,12 @@ #include #include #include +#include +#include #include #include #include +#include namespace fs = std::filesystem; @@ -14,6 +17,11 @@ namespace fs = std::filesystem; namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static std::string getTempFileName(const std::string & dir) { return fs::path(dir) / getRandomASCIIString(32); @@ -341,6 +349,35 @@ void UnlinkMetadataFileOperation::undo() outcome->num_hardlinks++; } +void TruncateMetadataFileOperation::execute(std::unique_lock & metadata_lock) +{ + if (metadata_storage.exists(path)) + { + auto metadata = metadata_storage.readMetadataUnlocked(path, metadata_lock); + while (metadata->getTotalSizeBytes() > target_size) + { + auto object_key_with_metadata = metadata->popLastObject(); + outcome->objects_to_remove.emplace_back(object_key_with_metadata.key.serialize(), path, object_key_with_metadata.metadata.size_bytes); + } + if (metadata->getTotalSizeBytes() != target_size) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} can't be truncated to size {}", path, target_size); + } + LOG_TEST(getLogger("TruncateMetadataFileOperation"), "Going to remove {} blobs.", outcome->objects_to_remove.size()); + + write_operation = std::make_unique(path, disk, metadata->serializeToString()); + + write_operation->execute(metadata_lock); + } +} + +void TruncateMetadataFileOperation::undo() +{ + if (write_operation) + write_operation->undo(); +} + + void SetReadonlyFileOperation::execute(std::unique_lock & metadata_lock) { auto metadata = metadata_storage.readMetadataUnlocked(path, metadata_lock); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index e8fda177b95..c1ad2882d19 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -292,4 +292,34 @@ private: std::unique_ptr write_operation; }; +struct TruncateMetadataFileOperation final : public IMetadataOperation +{ + const TruncateFileOperationOutcomePtr outcome = std::make_shared(); + + TruncateMetadataFileOperation( + const std::string & path_, + size_t target_size_, + const MetadataStorageFromDisk & metadata_storage_, + IDisk & disk_) + : path(path_) + , target_size(target_size_) + , metadata_storage(metadata_storage_) + , disk(disk_) + { + } + + void execute(std::unique_lock & metadata_lock) override; + + void undo() override; + +private: + std::string path; + size_t target_size; + + const MetadataStorageFromDisk & metadata_storage; + IDisk & disk; + + std::unique_ptr write_operation; +}; + } diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f47eeb60918..4f5e7ba3f4d 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -53,8 +54,13 @@ namespace ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int CANNOT_RESTORE_TABLE; extern const int NOT_IMPLEMENTED; + extern const int FAULT_INJECTED; } +namespace FailPoints +{ + extern const char stripe_log_sink_write_fallpoint[]; +} /// NOTE: The lock `StorageStripeLog::rwlock` is NOT kept locked while reading, /// because we read ranges of data that do not change. @@ -234,6 +240,11 @@ public: /// Save the new indices. storage.saveIndices(lock); + // While executing save file sizes the exception might occurs. S3::TooManyRequests for example. + fiu_do_on(FailPoints::stripe_log_sink_write_fallpoint, + { + throw Exception(ErrorCodes::FAULT_INJECTED, "Injecting fault for inserting into StipeLog table"); + }); /// Save the new file sizes. storage.saveFileSizes(lock); diff --git a/tests/integration/test_log_family_s3/configs/minio.xml b/tests/integration/test_log_family_s3/configs/minio.xml deleted file mode 100644 index 58771d6b284..00000000000 --- a/tests/integration/test_log_family_s3/configs/minio.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - s3 - http://minio1:9001/root/data/ - minio - minio123 - - - - diff --git a/tests/integration/test_log_family_s3/configs/storage_configuration.xml b/tests/integration/test_log_family_s3/configs/storage_configuration.xml new file mode 100644 index 00000000000..d479a59b197 --- /dev/null +++ b/tests/integration/test_log_family_s3/configs/storage_configuration.xml @@ -0,0 +1,34 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + true + + 1 + 0 + 1 + 20000 + + + + + +
+ s3_no_retries +
+
+
+
+
+
diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index bed379d098b..ed84bdf48e6 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -11,7 +11,7 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/minio.xml", "configs/ssl.xml"], + main_configs=["configs/storage_configuration.xml", "configs/ssl.xml"], with_minio=True, ) logging.info("Starting cluster...") @@ -84,3 +84,39 @@ def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_i assert_objects_count(cluster, 0) finally: node.query("DROP TABLE s3_test") + + +# Imitate case when error occurs while inserting into table. +# For examle S3::TooManyRequests. +# In that case we can update data file, but not the size file. +# So due to exception we should do truncate of the data file to undo the insert query. +# See FileChecker::repair(). +def test_stripe_log_truncate(cluster): + node = cluster.instances["node"] + + node.query( + """ + CREATE TABLE stripe_table ( + a int + ) ENGINE = StripeLog() + SETTINGS storage_policy='s3_no_retries' + """ + ) + + node.query("SYSTEM ENABLE FAILPOINT stripe_log_sink_write_fallpoint") + node.query( + """ + INSERT INTO stripe_table SELECT number FROM numbers(10) + """, + ignore_error=True, + ) + node.query("SYSTEM DISABLE FAILPOINT stripe_log_sink_write_fallpoint") + node.query("SELECT count(*) FROM stripe_table") == "0\n" + node.query("INSERT INTO stripe_table SELECT number FROM numbers(10)") + node.query("SELECT count(*) FROM stripe_table") == "10\n" + + # Make sure that everything is okey with the table after restart. + node.query("DETACH TABLE stripe_table") + node.query("ATTACH TABLE stripe_table") + + assert node.query("DROP TABLE stripe_table") == "" From 4653ec618d117f840cec5ba8c6d95895f0bbf4af Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 13 May 2024 13:43:47 +0000 Subject: [PATCH 185/651] Add more tests and documentation, fix existing tests and special build --- docs/en/sql-reference/data-types/dynamic.md | 86 ++++++++- src/Columns/ColumnDynamic.cpp | 7 + src/DataTypes/DataTypeDynamic.h | 2 +- ...9_dynamic_all_merge_algorithms_1.reference | 14 +- ... => 03040_dynamic_type_alters_1.reference} | 0 ...ters.sh => 03040_dynamic_type_alters_1.sh} | 3 +- .../03040_dynamic_type_alters_2.reference | 182 ++++++++++++++++++ .../03040_dynamic_type_alters_2.sh | 57 ++++++ .../03041_dynamic_type_check_table.reference | 56 ++++++ .../03041_dynamic_type_check_table.sh | 45 +++++ 10 files changed, 442 insertions(+), 10 deletions(-) rename tests/queries/0_stateless/{03040_dynamic_type_alters.reference => 03040_dynamic_type_alters_1.reference} (100%) rename tests/queries/0_stateless/{03040_dynamic_type_alters.sh => 03040_dynamic_type_alters_1.sh} (57%) create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2.reference create mode 100755 tests/queries/0_stateless/03040_dynamic_type_alters_2.sh create mode 100644 tests/queries/0_stateless/03041_dynamic_type_check_table.reference create mode 100755 tests/queries/0_stateless/03041_dynamic_type_check_table.sh diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index e3cade25b55..a2c8ba532ce 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -261,7 +261,7 @@ SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM te └─────────┴────────────────┴─────────┴─────────────────┘ ``` -## Reading Variant type from the data +## Reading Dynamic type from the data All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Dynamic` type. During data parsing ClickHouse tries to infer the type of each value and use it during insertion to `Dynamic` column. @@ -409,3 +409,87 @@ SELECT d, dynamicType(d) FROM test ORDER by d; └─────┴────────────────┘ ``` +## Reaching the limit in number of different data types stored inside Dynamic + +`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic). +When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values. + +Let's see what happens when the limit is reached in different scenarios. + +### Reaching the limit during data parsing + +During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values: + +```sql +SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', ' +{"d" : 42} +{"d" : [1, 2, 3]} +{"d" : "Hello, World!"} +{"d" : "2020-01-01"} +{"d" : ["str1", "str2", "str3"]} +{"d" : {"a" : 1, "b" : [1, 2, 3]}} +') +``` + +```text +┌─d──────────────────────────┬─dynamicType(d)─┐ +│ 42 │ Int64 │ +│ [1,2,3] │ Array(Int64) │ +│ Hello, World! │ String │ +│ 2020-01-01 │ String │ +│ ["str1", "str2", "str3"] │ String │ +│ {"a" : 1, "b" : [1, 2, 3]} │ String │ +└────────────────────────────┴────────────────┘ +``` + +As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`. + +### During merges of data parts in MergeTree table engines + +During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts. +In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation. + +Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types: + +```sql +CREATE TABLE test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree ORDER BY id; +SYSTEM STOP MERGES test; +INSERT INTO test SELECT number, number FROM numbers(5); +INSERT INTO test SELECT number, range(number) FROM numbers(4); +INSERT INTO test SELECT number, toDate(number) FROM numbers(3); +INSERT INTO test SELECT number, map(number, number) FROM numbers(2); +INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1); +``` + +Each insert will create a separate data pert with `Dynamic` column containing single type: +```sql +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamicType(d)──────┬─_part─────┐ +│ 5 │ UInt64 │ all_1_1_0 │ +│ 4 │ Array(UInt64) │ all_2_2_0 │ +│ 3 │ Date │ all_3_3_0 │ +│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │ +│ 1 │ String │ all_5_5_0 │ +└─────────┴─────────────────────┴───────────┘ +``` + +Now, let's merge all parts into one and see what will happen: + +```sql +SYSTEM START MERGES test; +OPTIMIZE TABLE test FINAL; +SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part; +``` + +```text +┌─count()─┬─dynamicType(d)─┬─_part─────┐ +│ 5 │ UInt64 │ all_1_5_2 │ +│ 6 │ String │ all_1_5_2 │ +│ 4 │ Array(UInt64) │ all_1_5_2 │ +└─────────┴────────────────┴───────────┘ +``` + +As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 76f536a3409..0f247638d92 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -290,6 +290,13 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size /// We cannot combine 2 Variant types as total number of variants exceeds the limit. /// In this case we will add most frequent variants from this range and insert them as usual, /// all other variants will be converted to String. + /// TODO: instead of keeping all current variants and just adding new most frequent variants + /// from source columns we can also try to replace rarest existing variants with frequent + /// variants from source column (so we will avoid casting new frequent variants to String + /// and keeping rare existing ones). It will require rewriting of existing data in Variant + /// column but will improve usability of Dynamic column for example during squashing blocks + /// during insert. + const auto & src_variant_column = dynamic_src.getVariantColumn(); /// Calculate ranges for each variant in current range. diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h index bd3d822fbb6..d5e4c5261ce 100644 --- a/src/DataTypes/DataTypeDynamic.h +++ b/src/DataTypes/DataTypeDynamic.h @@ -12,7 +12,7 @@ class DataTypeDynamic final : public IDataType public: static constexpr bool is_parametric = true; - DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES); + explicit DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES); TypeIndex getTypeId() const override { return TypeIndex::Dynamic; } const char * getFamilyName() const override { return "Dynamic"; } diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference index a7fbbabcd46..4b4a1e2ab51 100644 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference @@ -1,12 +1,12 @@ MergeTree compact + horizontal merge ReplacingMergeTree -100000 UInt64 100000 String +100000 UInt64 50000 UInt64 100000 String SummingMergeTree -100000 UInt64 100000 String +100000 UInt64 200000 1 50000 String 100000 UInt64 @@ -22,8 +22,8 @@ AggregatingMergeTree 100000 1 MergeTree wide + horizontal merge ReplacingMergeTree -100000 UInt64 100000 String +100000 UInt64 50000 UInt64 100000 String SummingMergeTree @@ -49,16 +49,16 @@ ReplacingMergeTree 50000 UInt64 100000 String SummingMergeTree -100000 UInt64 100000 String +100000 UInt64 200000 1 50000 String 100000 UInt64 50000 2 100000 1 AggregatingMergeTree -100000 UInt64 100000 String +100000 UInt64 200000 1 50000 String 100000 UInt64 @@ -66,8 +66,8 @@ AggregatingMergeTree 100000 1 MergeTree wide + vertical merge ReplacingMergeTree -100000 UInt64 100000 String +100000 UInt64 50000 UInt64 100000 String SummingMergeTree @@ -79,8 +79,8 @@ SummingMergeTree 50000 2 100000 1 AggregatingMergeTree -100000 UInt64 100000 String +100000 UInt64 200000 1 50000 String 100000 UInt64 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference similarity index 100% rename from tests/queries/0_stateless/03040_dynamic_type_alters.reference rename to tests/queries/0_stateless/03040_dynamic_type_alters_1.reference diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh similarity index 57% rename from tests/queries/0_stateless/03040_dynamic_type_alters.sh rename to tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index a20a92712e0..1f2a6a31ad7 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --stacktrace --max_insert_threads 3 --group_by_two_level_threshold 1000000 --group_by_two_level_threshold_bytes 42526602 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 8125230 --max_read_buffer_size 859505 --prefer_localhost_replica 1 --max_block_size 34577 --max_threads 41 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 1 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 99 --optimize_aggregation_in_order 1 --aggregation_in_order_max_block_bytes 27635208 --use_uncompressed_cache 0 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 6451111320 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 50 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 0 --remote_filesystem_read_prefetch 1 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 64Mi --filesystem_prefetches_limit 10 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 0 --filesystem_prefetch_step_bytes 100Mi --compile_aggregate_expressions 0 --compile_sort_description 1 --merge_tree_coarse_index_granularity 32 --optimize_distinct_in_order 0 --max_bytes_before_external_sort 10737418240 --max_bytes_before_external_group_by 10737418240 --max_bytes_before_remerge_sort 1374192967 --min_compress_block_size 2152247 --max_compress_block_size 1830907 --merge_tree_compact_parts_min_granules_to_multibuffer_read 79 --optimize_sorting_by_input_stream_properties 1 --http_response_buffer_size 106072 --http_wait_end_of_query True --enable_memory_bound_merging_of_aggregation_results 0 --min_count_to_compile_expression 0 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 3 --session_timezone Africa/Khartoum --prefer_warmed_unmerged_parts_seconds 4 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.03 --ratio_of_defaults_for_sparse_serialization 0.9779014012142565 --prefer_fetch_merged_part_size_threshold 4254002758 --vertical_merge_algorithm_min_rows_to_activate 1 --vertical_merge_algorithm_min_columns_to_activate 1 --allow_vertical_merges_from_compact_to_wide_parts 1 --min_merge_bytes_to_use_direct_io 1 --index_granularity_bytes 4982992 --merge_max_block_size 16662 --index_granularity 22872 --min_bytes_for_wide_part 1073741824 --compress_marks 0 --compress_primary_key 0 --marks_compress_block_size 86328 --primary_key_compress_block_size 64101 --replace_long_file_name_to_hash 0 --max_file_name_length 81 --min_bytes_for_full_part_storage 536870912 --compact_parts_max_bytes_to_buffer 480908080 --compact_parts_max_granules_to_buffer 1 --compact_parts_merge_max_bytes_to_prefetch_part 4535313 --cache_populated_by_fetch 0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" function run() { @@ -74,3 +74,4 @@ echo "MergeTree wide" $CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" run $CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference new file mode 100644 index 00000000000..18a181464e9 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -0,0 +1,182 @@ +MergeTree compact +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +MergeTree wide +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N [] [] [] +1 1 \N \N \N \N \N [] [] [] +2 2 \N \N \N \N \N [] [] [] +3 3 3 \N 3 \N \N [] [] [] +4 4 4 \N 4 \N \N [] [] [] +5 5 5 \N 5 \N \N [] [] [] +6 6 str_6 str_6 \N \N \N [] [] [] +7 7 str_7 str_7 \N \N \N [] [] [] +8 8 str_8 str_8 \N \N \N [] [] [] +9 9 \N \N \N \N \N [] [] [] +10 10 \N \N \N \N \N [] [] [] +11 11 \N \N \N \N \N [] [] [] +12 12 12 \N 12 \N \N [] [] [] +13 13 str_13 str_13 \N \N \N [] [] [] +14 14 \N \N \N \N \N [] [] [] +15 15 [15] \N \N \N \N [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N \N [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh new file mode 100755 index 00000000000..6491e64372f --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column" + $CH_CLIENT -q "alter table test add column d Dynamic settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "alter rename column 1" + $CH_CLIENT -q "alter table test rename column d to d1 settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" + $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert nested dynamic" + $CH_CLIENT -q "insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3)" + $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" + $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a, d1.\`Array(Dynamic)\`.UInt64, d1.\`Array(Dynamic)\`.String, d1.\`Array(Dynamic)\`.Date from test order by x" + + echo "alter rename column 2" + $CH_CLIENT -q "alter table test rename column d1 to d2 settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2)" + $CH_CLIENT -q "select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.\`Tuple(a UInt64)\`.a, d2.\`Array(Dynamic)\`.UInt64, d2.\`Array(Dynamic)\`.String, d2.\`Array(Dynamic)\`.Date, from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference new file mode 100644 index 00000000000..b1ea186a917 --- /dev/null +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -0,0 +1,56 @@ +MergeTree compact +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +check table +1 +MergeTree wide +initial insert +alter add column +3 None +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +insert after alter add column +4 String +4 UInt64 +7 None +0 0 \N \N \N \N \N +1 1 \N \N \N \N \N +2 2 \N \N \N \N \N +3 3 3 \N 3 \N \N +4 4 4 \N 4 \N \N +5 5 5 \N 5 \N \N +6 6 str_6 str_6 \N \N \N +7 7 str_7 str_7 \N \N \N +8 8 str_8 str_8 \N \N \N +9 9 \N \N \N \N \N +10 10 \N \N \N \N \N +11 11 \N \N \N \N \N +12 12 12 \N 12 \N \N +13 13 str_13 str_13 \N \N \N +14 14 \N \N \N \N \N +check table +1 diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh new file mode 100755 index 00000000000..3d802485be3 --- /dev/null +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column" + $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "insert after alter add column" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" + + echo "check table" + $CH_CLIENT -q "check table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" + From 997f2742d7addf4ed8bc1c9171c5f7d9712977bd Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 13 May 2024 15:46:25 +0200 Subject: [PATCH 186/651] fix test --- .../test_replicated_database/test.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 801aac90292..643b8ec154d 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -47,9 +47,8 @@ snapshotting_node = cluster.add_instance( snapshot_recovering_node = cluster.add_instance( "snapshot_recovering_node", main_configs=["configs/config.xml"], - user_configs=["configs/inconsistent_settings.xml"], + user_configs=["configs/settings.xml"], with_zookeeper=True, - macros={"shard": 1, "replica": 4}, ) all_nodes = [ @@ -60,6 +59,14 @@ all_nodes = [ snapshot_recovering_node, ] +bad_settings_node = cluster.add_instance( + "snapshot_recovering_node", + main_configs=["configs/config.xml"], + user_configs=["configs/inconsistent_settings.xml"], + with_zookeeper=True, + macros={"shard": 1, "replica": 4}, +) + uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") @@ -1448,12 +1455,12 @@ def test_table_metadata_corruption(started_cluster): def test_auto_recovery(started_cluster): dummy_node.query("DROP DATABASE IF EXISTS auto_recovery") - snapshot_recovering_node.query("DROP DATABASE IF EXISTS auto_recovery") + bad_settings_node.query("DROP DATABASE IF EXISTS auto_recovery") dummy_node.query( "CREATE DATABASE auto_recovery ENGINE = Replicated('/clickhouse/databases/auto_recovery', 'shard1', 'replica1');" ) - snapshot_recovering_node.query( + bad_settings_node.query( "CREATE DATABASE auto_recovery ENGINE = Replicated('/clickhouse/databases/auto_recovery', 'shard1', 'replica2') SETTINGS max_retries_before_automatic_recovery=3;" ) @@ -1463,7 +1470,7 @@ def test_auto_recovery(started_cluster): dummy_node.query("INSERT INTO auto_recovery.t1 SELECT 42") # dummy_node has 0 (default is 1), # so it will consider that the setting is changed, and will write it to the DDL entry - # snapshot_recovering_node has implicit_transaction=1, so it will fail and recover from snapshot + # bad_settings_node has implicit_transaction=1, so it will fail and recover from snapshot dummy_node.query( "CREATE TABLE auto_recovery.t2 (n int) ENGINE=ReplicatedMergeTree ORDER BY tuple()", settings={ @@ -1477,20 +1484,18 @@ def test_auto_recovery(started_cluster): settings={"distributed_ddl_task_timeout": 0}, ) - snapshot_recovering_node.query( + bad_settings_node.query( "SYSTEM SYNC DATABASE REPLICA auto_recovery", settings={"receive_timeout": 60} ) - assert snapshot_recovering_node.contains_in_log( + assert bad_settings_node.contains_in_log( "Unexpected error (3 times in a row), will try to restart main thread" ) - assert snapshot_recovering_node.contains_in_log( - "Cannot begin an implicit transaction" - ) - snapshot_recovering_node.query("SYSTEM SYNC REPLICA auto_recovery.t1") - snapshot_recovering_node.query("SYSTEM SYNC REPLICA auto_recovery.t2") + assert bad_settings_node.contains_in_log("Cannot begin an implicit transaction") + bad_settings_node.query("SYSTEM SYNC REPLICA auto_recovery.t1") + bad_settings_node.query("SYSTEM SYNC REPLICA auto_recovery.t2") assert "42\n" == dummy_node.query("SELECT * FROM auto_recovery.t2") assert "137\n" == dummy_node.query("SELECT * FROM auto_recovery.t1") - assert "42\n" == snapshot_recovering_node.query("SELECT * FROM auto_recovery.t2") - assert "137\n" == snapshot_recovering_node.query("SELECT * FROM auto_recovery.t1") + assert "42\n" == bad_settings_node.query("SELECT * FROM auto_recovery.t2") + assert "137\n" == bad_settings_node.query("SELECT * FROM auto_recovery.t1") From e2b39db24241fef6f5651f342531b563933d4f74 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 13 May 2024 14:14:35 +0000 Subject: [PATCH 187/651] Fix --- .../MetadataStorageFromDiskTransactionOperations.cpp | 4 ++-- .../MetadataStorageFromDiskTransactionOperations.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index b45ac20f9d2..79d1f4a1f7c 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -371,10 +371,10 @@ void TruncateMetadataFileOperation::execute(std::unique_lock & meta } } -void TruncateMetadataFileOperation::undo() +void TruncateMetadataFileOperation::undo(std::unique_lock & lock) { if (write_operation) - write_operation->undo(); + write_operation->undo(lock); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index 166c5329d39..26f9f6460a4 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -300,7 +300,7 @@ struct TruncateMetadataFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & lock) override; private: std::string path; From 86406c9ac15d4438f257e0aa6b2ca75ea0750add Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 13 May 2024 14:43:32 +0000 Subject: [PATCH 188/651] Fix build --- src/DataTypes/Serializations/SerializationDynamic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h index 4803bc25d18..7471ff54cf7 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.h +++ b/src/DataTypes/Serializations/SerializationDynamic.h @@ -105,7 +105,7 @@ private: { DynamicStructureSerializationVersion structure_version; DataTypePtr variant_type; - ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ}; + ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ, .data = {}}; explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {} }; From 904800afc8e77bc5567ba2096258aec4802d8cee Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 13 May 2024 17:44:14 +0200 Subject: [PATCH 189/651] Apply recent changes to storages3/hdfs/azure --- .../ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- .../ObjectStorage/StorageObjectStorage.cpp | 29 ++++++++++++------- .../ObjectStorage/StorageObjectStorage.h | 3 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 74707b61238..c24874d0a94 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -592,7 +592,7 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true); auto new_uri{uri}; new_uri.bucket = new_namespace; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index a187a8fc54d..01790760747 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -206,7 +206,7 @@ void StorageObjectStorage::read( size_t num_streams) { updateConfiguration(local_context); - if (partition_by && configuration->withWildcard()) + if (partition_by && configuration->withPartitionWildcard()) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned {} storage is not implemented yet", @@ -247,7 +247,14 @@ SinkToStoragePtr StorageObjectStorage::write( const auto sample_block = metadata_snapshot->getSampleBlock(); const auto & settings = configuration->getQuerySettings(local_context); - if (configuration->withWildcard()) + if (configuration->withGlobsIgnorePartitionWildcard()) + { + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Path '{}' contains globs, so the table is in readonly mode", + configuration->getPath()); + } + + if (configuration->withPartitionWildcard()) { ASTPtr partition_by_ast = nullptr; if (auto insert_query = std::dynamic_pointer_cast(query)) @@ -265,14 +272,6 @@ SinkToStoragePtr StorageObjectStorage::write( } } - if (configuration->withGlobs()) - { - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "{} key '{}' contains globs, so the table is in readonly mode", - getName(), configuration->getPath()); - } - auto paths = configuration->getPaths(); if (auto new_key = checkAndGetNewFileOnInsertIfNeeded( *object_storage, *configuration, settings, paths.front(), paths.size())) @@ -428,13 +427,21 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other) structure = other.structure; } -bool StorageObjectStorage::Configuration::withWildcard() const +bool StorageObjectStorage::Configuration::withPartitionWildcard() const { static const String PARTITION_ID_WILDCARD = "{_partition_id}"; return getPath().find(PARTITION_ID_WILDCARD) != String::npos || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos; } +bool StorageObjectStorage::Configuration::withGlobsIgnorePartitionWildcard() const +{ + if (!withPartitionWildcard()) + return withGlobs(); + else + return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos; +} + bool StorageObjectStorage::Configuration::isPathWithGlobs() const { return getPath().find_first_of("*?{") != std::string::npos; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 3f8ff79ad54..a396bad9d6e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -163,8 +163,9 @@ public: virtual void addStructureAndFormatToArgs( ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; - bool withWildcard() const; + bool withPartitionWildcard() const; bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } + bool withGlobsIgnorePartitionWildcard() const; bool isPathWithGlobs() const; bool isNamespaceWithGlobs() const; virtual std::string getPathWithoutGlobs() const; From 7543cd372c555a660c0d8149c68b45ab5f9bd7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Mon, 13 May 2024 18:52:25 +0300 Subject: [PATCH 190/651] Move test to another file --- .../configs/config.d/clusters.xml | 2 - ...sters_unusual.xml => clusters_zk_path.xml} | 2 +- .../test_modify_engine_on_restart/test.py | 53 ++---------------- .../{test_unusual_path.py => test_zk_path.py} | 55 +++++++++++++++++-- 4 files changed, 55 insertions(+), 57 deletions(-) rename tests/integration/test_modify_engine_on_restart/configs/config.d/{clusters_unusual.xml => clusters_zk_path.xml} (80%) rename tests/integration/test_modify_engine_on_restart/{test_unusual_path.py => test_zk_path.py} (52%) diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml index fbcf6499ec1..c8bbb7f3530 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml @@ -15,8 +15,6 @@ -/clickhouse/tables/{database}/{table}/{uuid} - 01 diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml similarity index 80% rename from tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml rename to tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml index 812291335b8..ba13cd87031 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml @@ -15,6 +15,6 @@ 01
-/lol/kek/'/{uuid} +/clickhouse/'/{database}/{table}/{uuid} diff --git a/tests/integration/test_modify_engine_on_restart/test.py b/tests/integration/test_modify_engine_on_restart/test.py index 81854340752..289b25dd89e 100644 --- a/tests/integration/test_modify_engine_on_restart/test.py +++ b/tests/integration/test_modify_engine_on_restart/test.py @@ -1,9 +1,5 @@ import pytest -from test_modify_engine_on_restart.common import ( - check_flags_deleted, - set_convert_flags, - get_table_path, -) +from test_modify_engine_on_restart.common import check_flags_deleted, set_convert_flags from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -40,8 +36,8 @@ def started_cluster(): cluster.shutdown() -def q(node, query, database=database_name): - return node.query(database=database, sql=query) +def q(node, query): + return node.query(database=database_name, sql=query) def create_tables(): @@ -124,7 +120,7 @@ def check_replica_added(): q( ch2, - f"CREATE TABLE rmt ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database_name}/rmt/{uuid}', '{{replica}}') PARTITION BY toYYYYMM(D) ORDER BY A", + f"CREATE TABLE rmt ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{uuid}/{{shard}}', '{{replica}}') PARTITION BY toYYYYMM(D) ORDER BY A", ) ch2.query(database=database_name, sql="SYSTEM SYNC REPLICA rmt", timeout=20) @@ -140,7 +136,7 @@ def check_replica_added(): def test_modify_engine_on_restart(started_cluster): - ch1.query("CREATE DATABASE IF NOT EXISTS " + database_name + " ON CLUSTER cluster") + ch1.query("CREATE DATABASE " + database_name + " ON CLUSTER cluster") create_tables() @@ -163,42 +159,3 @@ def test_modify_engine_on_restart(started_cluster): ch1.restart_clickhouse() check_tables(True) - - -def test_modify_engine_fails_if_zk_path_exists(started_cluster): - database_name = "zk_path" - ch1.query("CREATE DATABASE " + database_name + " ON CLUSTER cluster") - - q( - ch1, - "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", - database_name, - ) - uuid = q( - ch1, - f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", - database_name, - ).strip("'[]\n") - - q( - ch1, - f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", - database_name, - ) - - set_convert_flags(ch1, database_name, ["already_exists_1"]) - - table_data_path = get_table_path(ch1, "already_exists_1", database_name) - - ch1.stop_clickhouse() - ch1.start_clickhouse(retry_start=False, expected_to_fail=True) - - # Check if we can cancel convertation - ch1.exec_in_container( - [ - "bash", - "-c", - f"rm {table_data_path}convert_to_replicated", - ] - ) - ch1.start_clickhouse() diff --git a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py b/tests/integration/test_modify_engine_on_restart/test_zk_path.py similarity index 52% rename from tests/integration/test_modify_engine_on_restart/test_unusual_path.py rename to tests/integration/test_modify_engine_on_restart/test_zk_path.py index e82f48e8b34..8bbfe64240f 100644 --- a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py +++ b/tests/integration/test_modify_engine_on_restart/test_zk_path.py @@ -1,12 +1,16 @@ import pytest -from test_modify_engine_on_restart.common import check_flags_deleted, set_convert_flags +from test_modify_engine_on_restart.common import ( + check_flags_deleted, + get_table_path, + set_convert_flags, +) from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) ch1 = cluster.add_instance( "ch1", main_configs=[ - "configs/config.d/clusters_unusual.xml", + "configs/config.d/clusters_zk_path.xml", "configs/config.d/distributed_ddl.xml", ], with_zookeeper=True, @@ -27,8 +31,8 @@ def started_cluster(): cluster.shutdown() -def q(node, query): - return node.query(database=database_name, sql=query) +def q(node, query, database=database_name): + return node.query(database=database, sql=query) def create_tables(): @@ -63,7 +67,7 @@ def check_tables(): ) .strip() .startswith( - "ReplicatedReplacingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', D)" + "ReplicatedReplacingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', D)" ) ) assert ( @@ -73,7 +77,7 @@ def check_tables(): ) .strip() .startswith( - "ReplicatedVersionedCollapsingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', Sign, Version)" + "ReplicatedVersionedCollapsingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', Sign, Version)" ) ) @@ -90,3 +94,42 @@ def test_modify_engine_on_restart_with_unusual_path(started_cluster): check_flags_deleted(ch1, database_name, ["replacing_ver", "collapsing_ver"]) check_tables() + + +def test_modify_engine_fails_if_zk_path_exists(started_cluster): + database_name = "zk_path" + ch1.query("CREATE DATABASE " + database_name + " ON CLUSTER cluster") + + q( + ch1, + "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", + database_name, + ) + uuid = q( + ch1, + f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", + database_name, + ).strip("'[]\n") + + q( + ch1, + f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/\\'/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", + database_name, + ) + + set_convert_flags(ch1, database_name, ["already_exists_1"]) + + table_data_path = get_table_path(ch1, "already_exists_1", database_name) + + ch1.stop_clickhouse() + ch1.start_clickhouse(retry_start=False, expected_to_fail=True) + + # Check if we can cancel convertation + ch1.exec_in_container( + [ + "bash", + "-c", + f"rm {table_data_path}convert_to_replicated", + ] + ) + ch1.start_clickhouse() From 08a2d192c59914f861d5d7f9bbaf3ab138c52fec Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 15 Apr 2024 14:49:04 +0200 Subject: [PATCH 191/651] Fix HostResolver behavior on fail --- src/Common/HostResolvePool.cpp | 19 +++++++++++++++---- src/Common/HostResolvePool.h | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp index 6db28edc07e..ed9a60808e9 100644 --- a/src/Common/HostResolvePool.cpp +++ b/src/Common/HostResolvePool.cpp @@ -139,6 +139,8 @@ void HostResolver::setSuccess(const Poco::Net::IPAddress & address) if (it == records.end()) return; + it->fail_count = 0; + auto old_weight = it->getWeight(); ++it->usage; auto new_weight = it->getWeight(); @@ -158,8 +160,14 @@ void HostResolver::setFail(const Poco::Net::IPAddress & address) if (it == records.end()) return; - it->failed = true; - it->fail_time = now; + while (it != records.end() && it->address == address) + { + it->failed = true; + it->fail_time = now; + if (it->fail_count < RECORD_FAIL_COUNT_LIMIT) + ++it->fail_count; + ++it; + } } ProfileEvents::increment(metrics.failed); @@ -223,7 +231,10 @@ void HostResolver::updateImpl(Poco::Timestamp now, std::vector @@ -141,6 +142,7 @@ protected: size_t usage = 0; bool failed = false; Poco::Timestamp fail_time = 0; + size_t fail_count = 0; size_t weight_prefix_sum; From 87785e1c382a2580a95c2866d0090cbf725412c8 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 19 Apr 2024 19:05:27 +0200 Subject: [PATCH 192/651] Set failed only for single record in HostResolver --- src/Common/HostResolvePool.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp index ed9a60808e9..91f961addf0 100644 --- a/src/Common/HostResolvePool.cpp +++ b/src/Common/HostResolvePool.cpp @@ -160,14 +160,10 @@ void HostResolver::setFail(const Poco::Net::IPAddress & address) if (it == records.end()) return; - while (it != records.end() && it->address == address) - { - it->failed = true; - it->fail_time = now; - if (it->fail_count < RECORD_FAIL_COUNT_LIMIT) - ++it->fail_count; - ++it; - } + it->failed = true; + it->fail_time = now; + if (it->fail_count < RECORD_FAIL_COUNT_LIMIT) + ++it->fail_count; } ProfileEvents::increment(metrics.failed); From 22f1c197e5fa04a2a463b535b034663e720c6e63 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Thu, 2 May 2024 16:10:05 +0200 Subject: [PATCH 193/651] Test for HostResolver fail_count --- .../test_host_resolver_fail_count/__init__.py | 0 .../configs/config.d/cluster.xml | 12 ++ .../configs/config.d/s3.xml | 21 ++++ .../test_case.py | 103 ++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 tests/integration/test_host_resolver_fail_count/__init__.py create mode 100644 tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml create mode 100644 tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml create mode 100644 tests/integration/test_host_resolver_fail_count/test_case.py diff --git a/tests/integration/test_host_resolver_fail_count/__init__.py b/tests/integration/test_host_resolver_fail_count/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml b/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml new file mode 100644 index 00000000000..bde62b82719 --- /dev/null +++ b/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml @@ -0,0 +1,12 @@ + + + + 5 + 5 + 5 + 5 + 5 + 5 + + + \ No newline at end of file diff --git a/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml b/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml new file mode 100644 index 00000000000..94ac83b32ac --- /dev/null +++ b/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml @@ -0,0 +1,21 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + + +
+ s3 +
+
+
+
+
+
diff --git a/tests/integration/test_host_resolver_fail_count/test_case.py b/tests/integration/test_host_resolver_fail_count/test_case.py new file mode 100644 index 00000000000..895d764a268 --- /dev/null +++ b/tests/integration/test_host_resolver_fail_count/test_case.py @@ -0,0 +1,103 @@ +"""Test Interserver responses on configured IP.""" +import pytest +import time +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.d/cluster.xml", "configs/config.d/s3.xml"], + with_minio=True, +) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + +# The same value as in ClickHouse, this can't be confugured via config now +DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2*60 + + +def test_host_resolver(start_cluster): + minio_ip = cluster.get_instance_ip("minio1") + + # drop DNS cache + node.set_hosts([ + (minio_ip, "minio1"), + (node.ip_address, "minio1"), # no answer on 9001 port on this IP + ]) + + node.query("SYSTEM DROP DNS CACHE") + node.query("SYSTEM DROP CONNECTIONS CACHE") + + node.query(""" + CREATE TABLE test (key UInt32, value UInt32) + Engine=MergeTree() + ORDER BY key PARTITION BY key + SETTINGS storage_policy='s3' + """) + + initial_fails = "0\n" + k = 0 + limit = 100 + while initial_fails == "0\n": + node.query(f""" + INSERT INTO test VALUES (0,{k}) + """) + # HostResolver chooses IP randomly, so on single call can choose worked ID + initial_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + k += 1 + if k >= limit: + # Dead IP was not choosen for 100 iteration. + # This is not expected, but not an error actually. + # And test should be stopped. + return + + # initial_fails can be more than 1 if clickhouse does something in several parallel threads + + for j in range(10): + for i in range(10): + node.query(f""" + INSERT INTO test VALUES ({i+1},{j+1}) + """) + fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + assert fails == initial_fails + + # Check that clickhouse tries to recheck IP after 2 minutes + time.sleep(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS) + + intermediate_fails = initial_fails + limit = k + 100 + while intermediate_fails == initial_fails: + node.query(f""" + INSERT INTO test VALUES (101,{k}) + """) + intermediate_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + k += 1 + if k >= limit: + # Dead IP was not choosen for 100 iteration. + # This is not expected, but not an error actually. + # And test should be stopped. + return + + # After another 2 minutes shoudl not be new fails, next retry after 4 minutes + time.sleep(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS) + + initial_fails = intermediate_fails + limit = k + 100 + while intermediate_fails == initial_fails: + node.query(f""" + INSERT INTO test VALUES (102,{k}) + """) + intermediate_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + k += 1 + if k >= limit: + break + + assert k == limit From 58c53fa50b8a3885aa387989f8e59ca8e8f9d586 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Fri, 3 May 2024 13:27:58 +0200 Subject: [PATCH 194/651] Move some logic to HostResolver::Record methods --- src/Common/HostResolvePool.cpp | 13 ++++--------- src/Common/HostResolvePool.h | 28 ++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp index 91f961addf0..1b783bc596f 100644 --- a/src/Common/HostResolvePool.cpp +++ b/src/Common/HostResolvePool.cpp @@ -139,10 +139,8 @@ void HostResolver::setSuccess(const Poco::Net::IPAddress & address) if (it == records.end()) return; - it->fail_count = 0; - auto old_weight = it->getWeight(); - ++it->usage; + it->setSuccess(); auto new_weight = it->getWeight(); if (old_weight != new_weight) @@ -160,10 +158,7 @@ void HostResolver::setFail(const Poco::Net::IPAddress & address) if (it == records.end()) return; - it->failed = true; - it->fail_time = now; - if (it->fail_count < RECORD_FAIL_COUNT_LIMIT) - ++it->fail_count; + it->setFail(now); } ProfileEvents::increment(metrics.failed); @@ -244,8 +239,7 @@ void HostResolver::updateImpl(Poco::Timestamp now, std::vector @@ -142,7 +142,7 @@ protected: size_t usage = 0; bool failed = false; Poco::Timestamp fail_time = 0; - size_t fail_count = 0; + size_t consecutive_fail_count = 0; size_t weight_prefix_sum; @@ -168,6 +168,29 @@ protected: return 8; return 10; } + + void cleanTimeoutedFailedFlag(const Poco::Timestamp & now, const Poco::Timespan & keep_history) + { + if (!failed) + return; + /// Exponential increased time between flag cleanups + if (fail_time < now - Poco::Timespan(keep_history.totalSeconds() * (1ull << (consecutive_fail_count - 1)), 0)) + failed = false; + } + + void setFail(const Poco::Timestamp & now) + { + failed = true; + fail_time = now; + if (consecutive_fail_count < RECORD_CONSECTIVE_FAIL_COUNT_LIMIT) + ++consecutive_fail_count; + } + + void setSuccess() + { + consecutive_fail_count = 0; + ++usage; + } }; using Records = std::vector; @@ -180,6 +203,7 @@ protected: void updateWeights() TSA_REQUIRES(mutex); void updateWeightsImpl() TSA_REQUIRES(mutex); size_t getTotalWeight() const TSA_REQUIRES(mutex); + Poco::Timespan getRecordHistoryTime(const Record&) const; const String host; const Poco::Timespan history; From 61f7b95e3d4ec7711df7fadb332eabf02913ba75 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 13 May 2024 16:04:20 +0000 Subject: [PATCH 195/651] Fix build --- src/DataTypes/Serializations/SerializationDynamic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index d0ecc3b80a2..cb9d4a2f7bc 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -31,7 +31,7 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB ISerialization::SerializeBinaryBulkStatePtr variant_state; /// Variants statistics. Map (Variant name) -> (Variant size). - ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ }; + ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} }; SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} }; From f3b9a326fede69769811dc9309bfb5d00aefd874 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 13 May 2024 19:59:16 +0200 Subject: [PATCH 196/651] Fix build --- src/TableFunctions/TableFunctionObjectStorage.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index a997b34a75c..9f16a9a0b25 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -192,6 +192,15 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory) #if USE_HDFS factory.registerFunction>( { + .documentation = + { + .description=R"(The table function can be used to read the data stored on HDFS virtual filesystem.)", + .examples{ + { + "hdfs", + "SELECT * FROM hdfs(url, format, compression, structure])", "" + }} + }, .allow_readonly = false }); #endif From d8856b06d873797117ca260d009d55a7632bcb58 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 13 May 2024 20:06:58 +0200 Subject: [PATCH 197/651] add unit tests, add new counter AddressesBanned --- src/Common/CurrentMetrics.cpp | 1 + src/Common/HostResolvePool.cpp | 42 ++++++-- src/Common/HostResolvePool.h | 29 ++--- src/Common/tests/gtest_resolve_pool.cpp | 135 +++++++++++++++++++++++- 4 files changed, 185 insertions(+), 22 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 0f25397a961..01dd8271459 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -285,6 +285,7 @@ M(HTTPConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for http hosts") \ \ M(AddressesActive, "Total count of addresses which are used for creation connections with connection pools") \ + M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \ #ifdef APPLY_FOR_EXTERNAL_METRICS diff --git a/src/Common/HostResolvePool.cpp b/src/Common/HostResolvePool.cpp index 1b783bc596f..cad64ee7204 100644 --- a/src/Common/HostResolvePool.cpp +++ b/src/Common/HostResolvePool.cpp @@ -8,6 +8,8 @@ #include #include +#include + namespace ProfileEvents { @@ -19,6 +21,7 @@ namespace ProfileEvents namespace CurrentMetrics { extern const Metric AddressesActive; + extern const Metric AddressesBanned; } namespace DB @@ -36,6 +39,7 @@ HostResolverMetrics HostResolver::getMetrics() .expired = ProfileEvents::AddressesExpired, .failed = ProfileEvents::AddressesMarkedAsFailed, .active_count = CurrentMetrics::AddressesActive, + .banned_count = CurrentMetrics::AddressesBanned, }; } @@ -47,7 +51,7 @@ HostResolver::WeakPtr HostResolver::getWeakFromThis() HostResolver::HostResolver(String host_, Poco::Timespan history_) : host(std::move(host_)) , history(history_) - , resolve_function([](const String & host_to_resolve) { return DNSResolver::instance().resolveHostAll(host_to_resolve); }) + , resolve_function([](const String & host_to_resolve) { return DNSResolver::instance().resolveHostAllInOriginOrder(host_to_resolve); }) { update(); } @@ -62,6 +66,12 @@ HostResolver::HostResolver( HostResolver::~HostResolver() { std::lock_guard lock(mutex); + + auto banned_count = 0; + for (const auto & rec: records) + banned_count += rec.failed; + CurrentMetrics::sub(metrics.banned_count, banned_count); + CurrentMetrics::sub(metrics.active_count, records.size()); records.clear(); } @@ -113,6 +123,7 @@ void HostResolver::updateWeights() if (getTotalWeight() == 0 && !records.empty()) { + CurrentMetrics::sub(metrics.banned_count, records.size()); for (auto & rec : records) rec.failed = false; @@ -158,7 +169,8 @@ void HostResolver::setFail(const Poco::Net::IPAddress & address) if (it == records.end()) return; - it->setFail(now); + if (it->setFail(now)) + CurrentMetrics::add(metrics.banned_count); } ProfileEvents::increment(metrics.failed); @@ -215,17 +227,20 @@ void HostResolver::updateImpl(Poco::Timestamp now, std::vectorfailed) + CurrentMetrics::sub(metrics.banned_count); } ++it_before; } else if (it_before == records.end() || (it_next != next_gen.end() && *it_next < it_before->address)) { - CurrentMetrics::add(metrics.active_count, 1); - ProfileEvents::increment(metrics.discovered, 1); + /// there are could be duplicates in next_gen vector if (merged.empty() || merged.back().address != *it_next) + { + CurrentMetrics::add(metrics.active_count, 1); + ProfileEvents::increment(metrics.discovered, 1); merged.push_back(Record(*it_next, now)); - else - merged.back().resolve_time = now; + } ++it_next; } else @@ -239,9 +254,22 @@ void HostResolver::updateImpl(Poco::Timestamp now, std::vector #include #include +#include "base/defines.h" +#include #include #include @@ -29,8 +31,9 @@ protected: DB::CurrentThread::getProfileEvents().reset(); ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); - addresses = std::set{"127.0.0.1", "127.0.0.2", "127.0.0.3"}; + addresses = std::multiset{"127.0.0.1", "127.0.0.2", "127.0.0.3"}; // Code here will be called immediately after the constructor (right // before each test). } @@ -58,7 +61,7 @@ protected: } DB::HostResolverMetrics metrics = DB::HostResolver::getMetrics(); - std::set addresses; + std::multiset addresses; }; TEST_F(ResolvePoolTest, CanResolve) @@ -160,7 +163,7 @@ TEST_F(ResolvePoolTest, CanMerge) ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); auto old_addresses = addresses; - addresses = std::set{"127.0.0.4", "127.0.0.5"}; + addresses = std::multiset{"127.0.0.4", "127.0.0.5"}; resolver->update(); @@ -229,6 +232,7 @@ TEST_F(ResolvePoolTest, CanFail) ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.failed]); ASSERT_EQ(addresses.size(), CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); ASSERT_EQ(addresses.size(), DB::CurrentThread::getProfileEvents()[metrics.discovered]); for (size_t i = 0; i < 1000; ++i) @@ -243,15 +247,20 @@ TEST_F(ResolvePoolTest, CanFail) TEST_F(ResolvePoolTest, CanFailAndHeal) { auto resolver = make_resolver(); + ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); auto failed_addr = resolver->resolve(); failed_addr.setFail(); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); while (true) { auto next_addr = resolver->resolve(); if (*failed_addr == *next_addr) + { + ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); break; + } } } @@ -277,3 +286,123 @@ TEST_F(ResolvePoolTest, CanExpire) ASSERT_EQ(addresses.size() + 1, DB::CurrentThread::getProfileEvents()[metrics.discovered]); ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); } + + +TEST_F(ResolvePoolTest, DuplicatesInAddresses) +{ + auto resolver = make_resolver(); + + size_t unuque_addresses = addresses.size(); + + ASSERT_EQ(3, unuque_addresses); + ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.discovered]); + + ASSERT_TRUE(!addresses.empty()); + addresses.insert(*addresses.begin()); + addresses.insert(*addresses.begin()); + + size_t total_addresses = addresses.size(); + + ASSERT_EQ(addresses.count(*addresses.begin()), 3); + ASSERT_EQ(unuque_addresses + 2, total_addresses); + + resolver->update(); + ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.discovered]); +} + +void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics) +{ + ASSERT_EQ(iteration, DB::CurrentThread::getProfileEvents()[metrics.failed]); + for (size_t i = 0; i < 100; ++i) + { + auto next_addr = resolver->resolve(); + ASSERT_TRUE(addresses.contains(*next_addr)); + ASSERT_NE(*next_addr, *failed_addr); + } +} + +TEST_F(ResolvePoolTest, BannedForConsiquenceFail) +{ + size_t history_ms = 5; + auto resolver = make_resolver(history_ms); + + + auto failed_addr = resolver->resolve(); + ASSERT_TRUE(addresses.contains(*failed_addr)); + + failed_addr.setFail(); + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); + check_no_failed_address(1, resolver, addresses, failed_addr, metrics); + + sleepForMilliseconds(history_ms + 1); + resolver->update(); + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); + + failed_addr.setFail(); + check_no_failed_address(2, resolver, addresses, failed_addr, metrics); + + sleepForMilliseconds(history_ms + 1); + resolver->update(); + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); + + // ip still banned adter history_ms + update, because it was his second consiquent fail + check_no_failed_address(2, resolver, addresses, failed_addr, metrics); +} + +TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail) +{ + size_t history_ms = 5; + auto resolver = make_resolver(history_ms); + + auto failed_addr = resolver->resolve(); + ASSERT_TRUE(addresses.contains(*failed_addr)); + + failed_addr.setFail(); + failed_addr.setFail(); + failed_addr.setFail(); + + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); + check_no_failed_address(3, resolver, addresses, failed_addr, metrics); + + sleepForMilliseconds(history_ms + 1); + resolver->update(); + // ip is cleared after just 1 history_ms interval. + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); +} + +TEST_F(ResolvePoolTest, StillBannedAfterSuccess) +{ + size_t history_ms = 5; + auto resolver = make_resolver(history_ms); + + auto failed_addr = resolver->resolve(); + ASSERT_TRUE(addresses.contains(*failed_addr)); + + std::optionalresolve())> again_addr; + while (true) + { + auto addr = resolver->resolve(); + if (*addr == *failed_addr) + { + again_addr.emplace(std::move(addr)); + break; + } + } + chassert(again_addr); + + failed_addr.setFail(); + + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); + check_no_failed_address(1, resolver, addresses, failed_addr, metrics); + + again_addr = std::nullopt; // success; + + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); +} From 6ec868a3df1dac8f905a9df00e609743eb0c3346 Mon Sep 17 00:00:00 2001 From: divanik Date: Mon, 13 May 2024 20:04:21 +0000 Subject: [PATCH 198/651] Add schema inference test --- ...6_schema_inference_cache_s3_archives.reference | 14 ++++++++++++++ .../03036_schema_inference_cache_s3_archives.sql | 6 ++++++ .../0_stateless/data_minio/03036_json_archive.zip | Bin 0 -> 418 bytes 3 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference create mode 100644 tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql create mode 100644 tests/queries/0_stateless/data_minio/03036_json_archive.zip diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference new file mode 100644 index 00000000000..8bee9a685e3 --- /dev/null +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference @@ -0,0 +1,14 @@ +1 Str1 +2 Str2 +3 Str3 +4 Str4 +DEFAULT 03036_archive1.zip::example1.csv id Nullable(Int64), data Nullable(String) +DEFAULT 03036_archive1.zip::example2.csv \N +21 Str21 +22 Str22 +23 Str23 +24 Str24 +DEFAULT 03036_archive1.zip::example1.csv id Nullable(Int64), data Nullable(String) +DEFAULT 03036_archive1.zip::example2.csv \N +UNION 03036_json_archive.zip::example11.jsonl id Nullable(Int64), data Nullable(String) +UNION 03036_json_archive.zip::example12.jsonl id Nullable(Int64), data Nullable(String) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql new file mode 100644 index 00000000000..bfa50f1ebe1 --- /dev/null +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql @@ -0,0 +1,6 @@ +select * from s3(s3_conn, filename='03036_archive1.zip :: example{1,2}.csv') order by tuple(*); +select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file; + +set schema_inference_mode = 'union'; +select * from s3(s3_conn, filename='03036_json_archive.zip :: example{11,12}.jsonl') order by tuple(*); +select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file; \ No newline at end of file diff --git a/tests/queries/0_stateless/data_minio/03036_json_archive.zip b/tests/queries/0_stateless/data_minio/03036_json_archive.zip new file mode 100644 index 0000000000000000000000000000000000000000..31aa2c168b2f79142f92f178e09bd2124e5c5fe7 GIT binary patch literal 418 zcmWIWW@Zs#U|`^2aG1X~!ctkuSOdtj17dy#8HUt~#N2|MR6|3(tm6ErLy&>dmB0WmPaRK|!YNJOT|GQgvr0uCTylGH z$<1r)Muq^mg&sf)U#wVAj?F?NBnz(tExU%-!fPlN_CPFDdvQq(Vj&}w95XIIN&tPz zz`zK^TN*(u3?H*Xe2nJX0B?k$nEpUEv=GTqxUYbQf_#NxC@UMt4NO3|3P?W!aToyi C?rYEh literal 0 HcmV?d00001 From 97a5de96533048bb3503c1e3e8ad52416f2c8072 Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Fri, 15 Mar 2024 03:11:24 +0000 Subject: [PATCH 199/651] Initial support for loongarch64 Make ClickHouse compilable and runnable on loongarch64 So far only basic functionality was tested (on real hw), clickhouse server runs, exceptions works, client works, simple tests works. --- .github/workflows/master.yml | 12 +- PreLoad.cmake | 2 + base/base/defines.h | 4 +- base/poco/Foundation/include/Poco/Platform.h | 3 + cmake/arch.cmake | 2 + cmake/linux/toolchain-loongarch64.cmake | 23 ++ cmake/target.cmake | 15 + contrib/boost-cmake/CMakeLists.txt | 6 + contrib/librdkafka-cmake/config.h.in | 2 +- contrib/libssh-cmake/CMakeLists.txt | 2 + .../libssh-cmake/linux/loongarch64/config.h | 287 ++++++++++++++++++ contrib/libunwind | 2 +- contrib/rocksdb | 2 +- docs/en/development/build-cross-loongarch.md | 32 ++ docs/en/development/contrib.md | 2 +- docs/en/development/developer-instruction.md | 2 +- docs/en/development/style.md | 2 +- docs/en/development/tests.md | 2 +- docs/ru/development/build-cross-loongarch.mdx | 10 + docs/ru/development/style.md | 2 +- src/Common/StackTrace.cpp | 2 + src/Common/ThreadFuzzer.cpp | 5 +- src/Common/atomicRename.cpp | 2 + src/Common/waitForPid.cpp | 2 + src/Compression/LZ4_decompress_faster.cpp | 4 +- src/Disks/IO/ThreadPoolReader.cpp | 2 + .../aspell-ignore/en/aspell-dict.txt | 2 + 27 files changed, 418 insertions(+), 15 deletions(-) create mode 100644 cmake/linux/toolchain-loongarch64.cmake create mode 100644 contrib/libssh-cmake/linux/loongarch64/config.h create mode 100644 docs/en/development/build-cross-loongarch.md create mode 100644 docs/ru/development/build-cross-loongarch.mdx diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 125096209df..3ff9bc5693a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -240,11 +240,21 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinS390X: + # disabled because s390x refused to build in the migration to OpenSSL + # BuilderBinS390X: + # needs: [RunConfig, BuilderDebRelease] + # if: ${{ !failure() && !cancelled() }} + # uses: ./.github/workflows/reusable_build.yml + # with: + # build_name: binary_s390x + # data: ${{ needs.RunConfig.outputs.data }} + # checkout_depth: 0 + BuilderBinLOONGARCH64: needs: [RunConfig, BuilderDebRelease] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml with: - build_name: binary_s390x + build_name: binary_loongarch64 data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 ############################################################################################ diff --git a/PreLoad.cmake b/PreLoad.cmake index 4879e721ae3..e0fd37b2fd6 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -93,6 +93,8 @@ if (OS MATCHES "Linux" set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "") elseif (ARCH MATCHES "^(s390x.*|S390X.*)") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-s390x.cmake" CACHE INTERNAL "") + elseif (ARCH MATCHES "^(loongarch64.*|LOONGARCH64.*)") + set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-loongarch64.cmake" CACHE INTERNAL "") else () message (FATAL_ERROR "Unsupported architecture: ${ARCH}") endif () diff --git a/base/base/defines.h b/base/base/defines.h index 627c50c27d2..2fc54c37bde 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -28,8 +28,8 @@ #define NO_INLINE __attribute__((__noinline__)) #define MAY_ALIAS __attribute__((__may_alias__)) -#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) && !defined(__s390x__) && !(defined(__riscv) && (__riscv_xlen == 64)) -# error "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress), s390x (work in progress) and RISC-V 64 (experimental)" +#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) && !defined(__s390x__) && !(defined(__loongarch64)) && !(defined(__riscv) && (__riscv_xlen == 64)) +# error "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress), s390x (work in progress), loongarch64 (experimental) and RISC-V 64 (experimental)" #endif /// Check for presence of address sanitizer diff --git a/base/poco/Foundation/include/Poco/Platform.h b/base/poco/Foundation/include/Poco/Platform.h index fe45833aea6..2668d7d40a4 100644 --- a/base/poco/Foundation/include/Poco/Platform.h +++ b/base/poco/Foundation/include/Poco/Platform.h @@ -120,6 +120,7 @@ #define POCO_ARCH_AARCH64 0x0f #define POCO_ARCH_ARM64 0x0f // same as POCO_ARCH_AARCH64 #define POCO_ARCH_RISCV64 0x10 +#define POCO_ARCH_LOONGARCH64 0x12 #if defined(__ALPHA) || defined(__alpha) || defined(__alpha__) || defined(_M_ALPHA) @@ -209,6 +210,8 @@ #elif defined(__riscv) && (__riscv_xlen == 64) # define POCO_ARCH POCO_ARCH_RISCV64 # define POCO_ARCH_LITTLE_ENDIAN 1 +#elif defined(__loongarch64) +# define POCO_ARCH POCO_ARCH_LOONGARCH64 #endif diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 5ec05e49e3c..c992236dc95 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -11,6 +11,8 @@ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x.*|S390X.*)") set (ARCH_S390X 1) elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") set (ARCH_RISCV64 1) +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64") + set (ARCH_LOONGARCH64 1) else () message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_PROCESSOR} is not supported") endif () diff --git a/cmake/linux/toolchain-loongarch64.cmake b/cmake/linux/toolchain-loongarch64.cmake new file mode 100644 index 00000000000..bf1ca261183 --- /dev/null +++ b/cmake/linux/toolchain-loongarch64.cmake @@ -0,0 +1,23 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + +set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set (CMAKE_SYSTEM_NAME "Linux") +set (CMAKE_SYSTEM_PROCESSOR "loongarch64") +set (CMAKE_C_COMPILER_TARGET "loongarch64-linux-gnu") +set (CMAKE_CXX_COMPILER_TARGET "loongarch64-linux-gnu") +set (CMAKE_ASM_COMPILER_TARGET "loongarch64-linux-gnu") + +# Adding `-mcmodel=extreme` is to handle the link error: +# relocation R_LARCH_B26 out of range: 194148892 is not in [-134217728, 134217727] +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcmodel=extreme") +set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -mcmodel=extreme") + +set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-loongarch64") + +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-loongarch64/usr") + +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") diff --git a/cmake/target.cmake b/cmake/target.cmake index fb911ace7b5..52d67725edf 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -61,6 +61,21 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_RUST OFF CACHE INTERNAL "") + elseif (ARCH_LOONGARCH64) + set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") + set (ENABLE_LDAP OFF CACHE INTERNAL "") + set (OPENSSL_NO_ASM ON CACHE INTERNAL "") + set (ENABLE_JEMALLOC OFF CACHE INTERNAL "") + set (ENABLE_PARQUET OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_MYSQL OFF CACHE INTERNAL "") + set (ENABLE_RUST OFF CACHE INTERNAL "") + set (ENABLE_LIBPQXX OFF CACHE INTERNAL "") + set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") + set (ENABLE_DWARF_PARSER OFF CACHE INTERNAL "") + set (ENABLE_BLAKE3 OFF CACHE INTERNAL "") + set (COMPILER_CACHE disabled CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 2c60fc0e552..7191393533b 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -140,6 +140,12 @@ elseif (ARCH_RISCV64) "${LIBRARY_DIR}/libs/context/src/asm/make_riscv64_sysv_elf_gas.S" "${LIBRARY_DIR}/libs/context/src/asm/ontop_riscv64_sysv_elf_gas.S" ) +elseif (ARCH_LOONGARCH64) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_loongarch64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_loongarch64_sysv_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_loongarch64_sysv_elf_gas.S" + ) elseif(OS_DARWIN) set (SRCS_CONTEXT ${SRCS_CONTEXT} "${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S" diff --git a/contrib/librdkafka-cmake/config.h.in b/contrib/librdkafka-cmake/config.h.in index 52ae70aeea8..f6ec3bc0e79 100644 --- a/contrib/librdkafka-cmake/config.h.in +++ b/contrib/librdkafka-cmake/config.h.in @@ -66,7 +66,7 @@ #cmakedefine WITH_SASL_OAUTHBEARER 1 #cmakedefine WITH_SASL_CYRUS 1 // crc32chw -#if !defined(__PPC__) && !defined(__riscv) && !defined(__aarch64__) && !defined(__s390x__) +#if !defined(__PPC__) && !defined(__riscv) && !defined(__aarch64__) && !defined(__s390x__) && !defined(__loongarch64) #define WITH_CRC32C_HW 1 #endif // regex diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt index bd051195864..4cc3e2a1831 100644 --- a/contrib/libssh-cmake/CMakeLists.txt +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -110,6 +110,8 @@ if (OS_LINUX) target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/s390x") elseif (ARCH_RISCV64) target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/riscv64") + elseif (ARCH_LOONGARCH64) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/loongarch64") else () message(FATAL_ERROR "Platform is not supported") endif () diff --git a/contrib/libssh-cmake/linux/loongarch64/config.h b/contrib/libssh-cmake/linux/loongarch64/config.h new file mode 100644 index 00000000000..aa684ca29a3 --- /dev/null +++ b/contrib/libssh-cmake/linux/loongarch64/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/loongarch64" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +/* #undef HAVE_EXPLICIT_BZERO 1 */ + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libunwind b/contrib/libunwind index 40d8eadf96b..fa519913b9c 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965 +Subproject commit fa519913b9c157663824bee95c5737669a40eaf3 diff --git a/contrib/rocksdb b/contrib/rocksdb index dead55e60b8..3a0b80ca9d6 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430 +Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984 diff --git a/docs/en/development/build-cross-loongarch.md b/docs/en/development/build-cross-loongarch.md new file mode 100644 index 00000000000..137b6969fd6 --- /dev/null +++ b/docs/en/development/build-cross-loongarch.md @@ -0,0 +1,32 @@ +--- +slug: /en/development/build-cross-loongarch +sidebar_position: 70 +title: How to Build ClickHouse on Linux for LOONGARCH64 Architecture +sidebar_label: Build on Linux for LOONGARCH64 +--- + +As of writing (2024/03/15) building for loongarch considered to be highly experimental. Not all features can be enabled. + +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with LOONGARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. + +The cross-build for LOONGARCH64 is based on the [Build instructions](../development/build.md), follow them first. + +## Install Clang-18 + +Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do +``` +sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" +``` + +## Build ClickHouse {#build-clickhouse} + + +The llvm version required for building must be greater than or equal to 18.1.0. +``` bash +cd ClickHouse +mkdir build-loongarch64 +CC=clang-18 CXX=clang++-18 cmake . -Bbuild-loongarch64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-loongarch64.cmake +ninja -C build-loongarch64 +``` + +The resulting binary will run only on Linux with the LOONGARCH64 CPU architecture. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index bbc5fbeebcb..5f96466bbec 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -1,6 +1,6 @@ --- slug: /en/development/contrib -sidebar_position: 72 +sidebar_position: 73 sidebar_label: Third-Party Libraries description: A list of third-party libraries used --- diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 772d1f97590..a66be04edd6 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -72,7 +72,7 @@ You can also add original ClickHouse repo address to your local repository to pu After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. :::note -Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on. +Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md), [on Linux for Linux/LOONGARCH](build-cross-loongarch.md) and so on. ::: ## Build System {#build-system} diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 77a550f2a0e..d201bbb0d3c 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -1,6 +1,6 @@ --- slug: /en/development/style -sidebar_position: 70 +sidebar_position: 71 sidebar_label: C++ Guide description: A list of recommendations regarding coding style, naming convention, formatting and more --- diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index efbce54d44b..bbc7dac0a2a 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -1,6 +1,6 @@ --- slug: /en/development/tests -sidebar_position: 71 +sidebar_position: 72 sidebar_label: Testing title: ClickHouse Testing description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. diff --git a/docs/ru/development/build-cross-loongarch.mdx b/docs/ru/development/build-cross-loongarch.mdx new file mode 100644 index 00000000000..31514f7439f --- /dev/null +++ b/docs/ru/development/build-cross-loongarch.mdx @@ -0,0 +1,10 @@ +--- +slug: /ru/development/build-cross-loongarch +sidebar_position: 69 +sidebar_label: Build on Linux for LOONGARCH64 +title: Build on Linux for LOONGARCH64 +--- + +import Content from '@site/docs/en/development/build-cross-loongarch.md'; + + diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index cd1297504af..4aa2073d75b 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -1,6 +1,6 @@ --- slug: /ru/development/style -sidebar_position: 69 +sidebar_position: 70 sidebar_label: "Как писать код на C++" --- diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 4200161f8e8..6e6f5b42b36 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -210,6 +210,8 @@ static void * getCallerAddress(const ucontext_t & context) return reinterpret_cast(context.uc_mcontext.__gregs[REG_PC]); #elif defined(__s390x__) return reinterpret_cast(context.uc_mcontext.psw.addr); +#elif defined(__loongarch64) + return reinterpret_cast(context.uc_mcontext.__pc); #else return nullptr; #endif diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index d1e252a8184..3438ddc444d 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -361,7 +361,7 @@ void ThreadFuzzer::setup() const /// Starting from glibc 2.34 there are no internal symbols without version, /// so not __pthread_mutex_lock but __pthread_mutex_lock@2.2.5 -#if defined(OS_LINUX) and !defined(USE_MUSL) +#if defined(OS_LINUX) and !defined(USE_MUSL) and !defined(__loongarch64) /// You can get version from glibc/sysdeps/unix/sysv/linux/$ARCH/$BITS_OR_BYTE_ORDER/libc.abilist #if defined(__amd64__) # define GLIBC_SYMVER "GLIBC_2.2.5" @@ -383,7 +383,8 @@ void ThreadFuzzer::setup() const GLIBC_COMPAT_SYMBOL(__pthread_mutex_lock) #endif -#if defined(ADDRESS_SANITIZER) +/// The loongarch64's glibc_version is 2.36 +#if defined(ADDRESS_SANITIZER) || defined(__loongarch64) #if USE_JEMALLOC #error "ASan cannot be used with jemalloc" #endif diff --git a/src/Common/atomicRename.cpp b/src/Common/atomicRename.cpp index 44e02995858..4acdff5f66c 100644 --- a/src/Common/atomicRename.cpp +++ b/src/Common/atomicRename.cpp @@ -46,6 +46,8 @@ namespace ErrorCodes #define __NR_renameat2 357 #elif defined(__riscv) #define __NR_renameat2 276 + #elif defined(__loongarch64) + #define __NR_renameat2 276 #else #error "Unsupported architecture" #endif diff --git a/src/Common/waitForPid.cpp b/src/Common/waitForPid.cpp index e9f6c380086..16c447245a2 100644 --- a/src/Common/waitForPid.cpp +++ b/src/Common/waitForPid.cpp @@ -47,6 +47,8 @@ enum PollPidResult #define SYS_pidfd_open 434 #elif defined(__s390x__) #define SYS_pidfd_open 434 + #elif defined(__loongarch64) + #define SYS_pidfd_open 434 #else #error "Unsupported architecture" #endif diff --git a/src/Compression/LZ4_decompress_faster.cpp b/src/Compression/LZ4_decompress_faster.cpp index b548feed848..c5bc6eb67d5 100644 --- a/src/Compression/LZ4_decompress_faster.cpp +++ b/src/Compression/LZ4_decompress_faster.cpp @@ -79,7 +79,7 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, size_t offset) } -#if defined(__x86_64__) || defined(__PPC__) || defined(__s390x__) || defined(__riscv) +#if defined(__x86_64__) || defined(__PPC__) || defined(__s390x__) || defined(__riscv) || defined(__loongarch64) /** We use 'xmm' (128bit SSE) registers here to shuffle 16 bytes. * @@ -268,7 +268,7 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset) } -#if defined(__x86_64__) || defined(__PPC__) || defined(__s390x__) || defined (__riscv) +#if defined(__x86_64__) || defined(__PPC__) || defined(__s390x__) || defined (__riscv) || defined(__loongarch64) inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset) { diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index 4713e20ccc8..da31388a8ea 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -36,6 +36,8 @@ #define SYS_preadv2 380 #elif defined(__riscv) #define SYS_preadv2 286 + #elif defined(__loongarch64) + #define SYS_preadv2 286 #else #error "Unsupported architecture" #endif diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 996f7da234a..2a335cfc55a 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -475,6 +475,7 @@ LLDB LLVM's LOCALTIME LOCALTIMESTAMP +LOONGARCH LONGLONG Levenshtein Liao @@ -1910,6 +1911,7 @@ logTrace logagent loghouse london +loongarch lookups lowcardinality lowerUTF From eb96f19e48b90c863989b47d4f08763d95b8d234 Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Thu, 18 Apr 2024 12:34:06 +0000 Subject: [PATCH 200/651] Add support for building with openssl for loongarch64 --- contrib/openssl-cmake/CMakeLists.txt | 28 + .../include/crypto/bn_conf.h | 29 + .../include/crypto/dso_conf.h | 19 + .../include/internal/param_names.h | 376 +++ .../linux_loongarch64/include/openssl/asn1.h | 1133 +++++++ .../linux_loongarch64/include/openssl/asn1t.h | 946 ++++++ .../linux_loongarch64/include/openssl/bio.h | 1010 ++++++ .../linux_loongarch64/include/openssl/cmp.h | 629 ++++ .../linux_loongarch64/include/openssl/cms.h | 508 +++ .../linux_loongarch64/include/openssl/conf.h | 214 ++ .../include/openssl/configuration.h | 158 + .../include/openssl/core_names.h | 475 +++ .../linux_loongarch64/include/openssl/crmf.h | 229 ++ .../include/openssl/crypto.h | 561 ++++ .../linux_loongarch64/include/openssl/ct.h | 573 ++++ .../linux_loongarch64/include/openssl/err.h | 511 +++ .../linux_loongarch64/include/openssl/ess.h | 128 + .../include/openssl/fipskey.h | 36 + .../linux_loongarch64/include/openssl/lhash.h | 331 ++ .../linux_loongarch64/include/openssl/ocsp.h | 483 +++ .../include/openssl/opensslv.h | 114 + .../include/openssl/pkcs12.h | 363 +++ .../linux_loongarch64/include/openssl/pkcs7.h | 430 +++ .../include/openssl/safestack.h | 297 ++ .../linux_loongarch64/include/openssl/srp.h | 285 ++ .../linux_loongarch64/include/openssl/ssl.h | 2765 +++++++++++++++++ .../linux_loongarch64/include/openssl/ui.h | 407 +++ .../linux_loongarch64/include/openssl/x509.h | 1286 ++++++++ .../include/openssl/x509_vfy.h | 901 ++++++ .../include/openssl/x509v3.h | 1454 +++++++++ .../include/prov/der_digests.h | 160 + .../linux_loongarch64/include/prov/der_dsa.h | 94 + .../linux_loongarch64/include/prov/der_ec.h | 286 ++ .../linux_loongarch64/include/prov/der_ecx.h | 50 + .../linux_loongarch64/include/prov/der_rsa.h | 187 ++ .../linux_loongarch64/include/prov/der_sm2.h | 37 + .../linux_loongarch64/include/prov/der_wrap.h | 46 + .../include_private/buildinf.h | 30 + .../linux_loongarch64/params_idx.c | 2710 ++++++++++++++++ 39 files changed, 20279 insertions(+) create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h create mode 100644 contrib/openssl-cmake/linux_loongarch64/params_idx.c diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 72846143b9e..f5c3df1a7bc 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -61,6 +61,9 @@ elseif(ARCH_S390X) elseif(ARCH_RISCV64) set(PLATFORM_DIRECTORY linux_riscv64) add_definitions(-DOPENSSL_CPUID_OBJ -DL_ENDIAN) +elseif(ARCH_LOONGARCH64) + set(PLATFORM_DIRECTORY linux_loongarch64) + add_definitions(-DOPENSSL_CPUID_OBJ -DL_ENDIAN) endif() file(STRINGS "${PLATFORM_DIRECTORY}/include/openssl/opensslv.h" OPENSSL_VERSION_STR @@ -204,6 +207,13 @@ elseif(ARCH_RISCV64) perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/riscv64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/riscv64cpuid.S) perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-riscv64-zkn.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-riscv64-zkn.S) perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/ghash-riscv64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/ghash-riscv64.S) +elseif(ARCH_LOONGARCH64) + macro(perl_generate_asm FILE_IN FILE_OUT) + add_custom_command(OUTPUT ${FILE_OUT} + COMMAND /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) + endmacro() + + perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/loongarch64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S) endif() set(CRYPTO_SRC @@ -1324,6 +1334,24 @@ elseif(ARCH_RISCV64) ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c ${OPENSSL_SOURCE_DIR}/crypto/riscvcap.c ) +elseif(ARCH_LOONGARCH64) + set(CRYPTO_SRC ${CRYPTO_SRC} + ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S + ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c + ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c + ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_dgst.c + ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c + ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c + ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c + ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c + ${OPENSSL_SOURCE_DIR}/crypto/loongarchcap.c + ) endif() set(SSL_SRC diff --git a/contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h b/contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h new file mode 100644 index 00000000000..0347a6ddc06 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/crypto/bn_conf.h @@ -0,0 +1,29 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from include/crypto/bn_conf.h.in */ +/* + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OSSL_CRYPTO_BN_CONF_H +# define OSSL_CRYPTO_BN_CONF_H +# pragma once + +/* + * The contents of this file are not used in the UEFI build, as + * both 32-bit and 64-bit builds are supported from a single run + * of the Configure script. + */ + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h b/contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h new file mode 100644 index 00000000000..795dfa0f1a6 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/crypto/dso_conf.h @@ -0,0 +1,19 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from include/crypto/dso_conf.h.in */ +/* + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OSSL_CRYPTO_DSO_CONF_H +# define OSSL_CRYPTO_DSO_CONF_H +# pragma once + +# define DSO_DLFCN +# define HAVE_DLFCN_H +# define DSO_EXTENSION ".so" +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h b/contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h new file mode 100644 index 00000000000..e721d071617 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/internal/param_names.h @@ -0,0 +1,376 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/internal/param_names.h.in + * + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +int ossl_param_find_pidx(const char *s); + +/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ +#define NUM_PIDX 290 + +#define PIDX_ALG_PARAM_CIPHER 0 +#define PIDX_ALG_PARAM_DIGEST 1 +#define PIDX_ALG_PARAM_ENGINE 2 +#define PIDX_ALG_PARAM_MAC 3 +#define PIDX_ALG_PARAM_PROPERTIES 4 +#define PIDX_ASYM_CIPHER_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_ENGINE PIDX_PKEY_PARAM_ENGINE +#define PIDX_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION 5 +#define PIDX_ASYM_CIPHER_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_MGF1_DIGEST_PROPS PIDX_PKEY_PARAM_MGF1_PROPERTIES +#define PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS 6 +#define PIDX_ASYM_CIPHER_PARAM_OAEP_LABEL 7 +#define PIDX_ASYM_CIPHER_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE +#define PIDX_ASYM_CIPHER_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION 8 +#define PIDX_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION 9 +#define PIDX_CAPABILITY_TLS_GROUP_ALG 10 +#define PIDX_CAPABILITY_TLS_GROUP_ID 11 +#define PIDX_CAPABILITY_TLS_GROUP_IS_KEM 12 +#define PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS 13 +#define PIDX_CAPABILITY_TLS_GROUP_MAX_TLS 14 +#define PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS 15 +#define PIDX_CAPABILITY_TLS_GROUP_MIN_TLS 16 +#define PIDX_CAPABILITY_TLS_GROUP_NAME 17 +#define PIDX_CAPABILITY_TLS_GROUP_NAME_INTERNAL 18 +#define PIDX_CAPABILITY_TLS_GROUP_SECURITY_BITS 19 +#define PIDX_CAPABILITY_TLS_SIGALG_CODE_POINT 20 +#define PIDX_CAPABILITY_TLS_SIGALG_HASH_NAME 21 +#define PIDX_CAPABILITY_TLS_SIGALG_HASH_OID 22 +#define PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME 23 +#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE 24 +#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID 25 +#define PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS 14 +#define PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS 16 +#define PIDX_CAPABILITY_TLS_SIGALG_NAME 26 +#define PIDX_CAPABILITY_TLS_SIGALG_OID 27 +#define PIDX_CAPABILITY_TLS_SIGALG_SECURITY_BITS 28 +#define PIDX_CAPABILITY_TLS_SIGALG_SIG_NAME 29 +#define PIDX_CAPABILITY_TLS_SIGALG_SIG_OID 30 +#define PIDX_CIPHER_PARAM_AEAD 31 +#define PIDX_CIPHER_PARAM_AEAD_IVLEN PIDX_CIPHER_PARAM_IVLEN +#define PIDX_CIPHER_PARAM_AEAD_MAC_KEY 32 +#define PIDX_CIPHER_PARAM_AEAD_TAG 33 +#define PIDX_CIPHER_PARAM_AEAD_TAGLEN 34 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_AAD 35 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_AAD_PAD 36 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN 37 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_IV_FIXED 38 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV 39 +#define PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS 40 +#define PIDX_CIPHER_PARAM_BLOCK_SIZE 41 +#define PIDX_CIPHER_PARAM_CTS 42 +#define PIDX_CIPHER_PARAM_CTS_MODE 43 +#define PIDX_CIPHER_PARAM_CUSTOM_IV 44 +#define PIDX_CIPHER_PARAM_HAS_RAND_KEY 45 +#define PIDX_CIPHER_PARAM_IV 46 +#define PIDX_CIPHER_PARAM_IVLEN 47 +#define PIDX_CIPHER_PARAM_KEYLEN 48 +#define PIDX_CIPHER_PARAM_MODE 49 +#define PIDX_CIPHER_PARAM_NUM 50 +#define PIDX_CIPHER_PARAM_PADDING 51 +#define PIDX_CIPHER_PARAM_RANDOM_KEY 52 +#define PIDX_CIPHER_PARAM_RC2_KEYBITS 53 +#define PIDX_CIPHER_PARAM_ROUNDS 54 +#define PIDX_CIPHER_PARAM_SPEED 55 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 56 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 57 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 58 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 59 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 60 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 61 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 62 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 63 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 64 +#define PIDX_CIPHER_PARAM_TLS_MAC 65 +#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 66 +#define PIDX_CIPHER_PARAM_TLS_VERSION 67 +#define PIDX_CIPHER_PARAM_UPDATED_IV 68 +#define PIDX_CIPHER_PARAM_USE_BITS 69 +#define PIDX_CIPHER_PARAM_XTS_STANDARD 70 +#define PIDX_DECODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_DIGEST_PARAM_ALGID_ABSENT 71 +#define PIDX_DIGEST_PARAM_BLOCK_SIZE 41 +#define PIDX_DIGEST_PARAM_MICALG 72 +#define PIDX_DIGEST_PARAM_PAD_TYPE 73 +#define PIDX_DIGEST_PARAM_SIZE 74 +#define PIDX_DIGEST_PARAM_SSL3_MS 75 +#define PIDX_DIGEST_PARAM_XOF 76 +#define PIDX_DIGEST_PARAM_XOFLEN 77 +#define PIDX_DRBG_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_DRBG_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 78 +#define PIDX_DRBG_PARAM_MAC PIDX_ALG_PARAM_MAC +#define PIDX_DRBG_PARAM_MAX_ADINLEN 79 +#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 80 +#define PIDX_DRBG_PARAM_MAX_LENGTH 81 +#define PIDX_DRBG_PARAM_MAX_NONCELEN 82 +#define PIDX_DRBG_PARAM_MAX_PERSLEN 83 +#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 84 +#define PIDX_DRBG_PARAM_MIN_LENGTH 85 +#define PIDX_DRBG_PARAM_MIN_NONCELEN 86 +#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 87 +#define PIDX_DRBG_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_DRBG_PARAM_RANDOM_DATA 88 +#define PIDX_DRBG_PARAM_RESEED_COUNTER 89 +#define PIDX_DRBG_PARAM_RESEED_REQUESTS 90 +#define PIDX_DRBG_PARAM_RESEED_TIME 91 +#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 92 +#define PIDX_DRBG_PARAM_SIZE 74 +#define PIDX_DRBG_PARAM_USE_DF 93 +#define PIDX_ENCODER_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 94 +#define PIDX_ENCODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 95 +#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 96 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 97 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 98 +#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 99 +#define PIDX_EXCHANGE_PARAM_KDF_TYPE 100 +#define PIDX_EXCHANGE_PARAM_KDF_UKM 101 +#define PIDX_EXCHANGE_PARAM_PAD 102 +#define PIDX_GEN_PARAM_ITERATION 103 +#define PIDX_GEN_PARAM_POTENTIAL 104 +#define PIDX_KDF_PARAM_ARGON2_AD 105 +#define PIDX_KDF_PARAM_ARGON2_LANES 106 +#define PIDX_KDF_PARAM_ARGON2_MEMCOST 107 +#define PIDX_KDF_PARAM_ARGON2_VERSION 108 +#define PIDX_KDF_PARAM_CEK_ALG 109 +#define PIDX_KDF_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_KDF_PARAM_CONSTANT 110 +#define PIDX_KDF_PARAM_DATA 111 +#define PIDX_KDF_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_KDF_PARAM_EARLY_CLEAN 112 +#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 113 +#define PIDX_KDF_PARAM_HMACDRBG_NONCE 114 +#define PIDX_KDF_PARAM_INFO 115 +#define PIDX_KDF_PARAM_ITER 116 +#define PIDX_KDF_PARAM_KBKDF_R 117 +#define PIDX_KDF_PARAM_KBKDF_USE_L 118 +#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 119 +#define PIDX_KDF_PARAM_KEY 120 +#define PIDX_KDF_PARAM_LABEL 121 +#define PIDX_KDF_PARAM_MAC PIDX_ALG_PARAM_MAC +#define PIDX_KDF_PARAM_MAC_SIZE 122 +#define PIDX_KDF_PARAM_MODE 49 +#define PIDX_KDF_PARAM_PASSWORD 123 +#define PIDX_KDF_PARAM_PKCS12_ID 124 +#define PIDX_KDF_PARAM_PKCS5 125 +#define PIDX_KDF_PARAM_PREFIX 126 +#define PIDX_KDF_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_KDF_PARAM_SALT 127 +#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 128 +#define PIDX_KDF_PARAM_SCRYPT_N 129 +#define PIDX_KDF_PARAM_SCRYPT_P 130 +#define PIDX_KDF_PARAM_SCRYPT_R 117 +#define PIDX_KDF_PARAM_SECRET 131 +#define PIDX_KDF_PARAM_SEED 132 +#define PIDX_KDF_PARAM_SIZE 74 +#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 133 +#define PIDX_KDF_PARAM_SSHKDF_TYPE 134 +#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 135 +#define PIDX_KDF_PARAM_THREADS 136 +#define PIDX_KDF_PARAM_UKM 137 +#define PIDX_KDF_PARAM_X942_ACVPINFO 138 +#define PIDX_KDF_PARAM_X942_PARTYUINFO 139 +#define PIDX_KDF_PARAM_X942_PARTYVINFO 140 +#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 141 +#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 142 +#define PIDX_KDF_PARAM_X942_USE_KEYBITS 143 +#define PIDX_KEM_PARAM_IKME 144 +#define PIDX_KEM_PARAM_OPERATION 145 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 146 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 147 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 148 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE 49 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 149 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 150 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 151 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 152 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 153 +#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 154 +#define PIDX_MAC_PARAM_BLOCK_SIZE 155 +#define PIDX_MAC_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_MAC_PARAM_CUSTOM 156 +#define PIDX_MAC_PARAM_C_ROUNDS 157 +#define PIDX_MAC_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_MAC_PARAM_DIGEST_NOINIT 158 +#define PIDX_MAC_PARAM_DIGEST_ONESHOT 159 +#define PIDX_MAC_PARAM_D_ROUNDS 160 +#define PIDX_MAC_PARAM_IV 46 +#define PIDX_MAC_PARAM_KEY 120 +#define PIDX_MAC_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_MAC_PARAM_SALT 127 +#define PIDX_MAC_PARAM_SIZE 74 +#define PIDX_MAC_PARAM_TLS_DATA_SIZE 161 +#define PIDX_MAC_PARAM_XOF 76 +#define PIDX_OBJECT_PARAM_DATA 111 +#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 162 +#define PIDX_OBJECT_PARAM_DATA_TYPE 163 +#define PIDX_OBJECT_PARAM_DESC 164 +#define PIDX_OBJECT_PARAM_REFERENCE 165 +#define PIDX_OBJECT_PARAM_TYPE 134 +#define PIDX_PASSPHRASE_PARAM_INFO 115 +#define PIDX_PKEY_PARAM_BITS 166 +#define PIDX_PKEY_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 167 +#define PIDX_PKEY_PARAM_DHKEM_IKM 168 +#define PIDX_PKEY_PARAM_DH_GENERATOR 169 +#define PIDX_PKEY_PARAM_DH_PRIV_LEN 170 +#define PIDX_PKEY_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_PKEY_PARAM_DIGEST_SIZE 171 +#define PIDX_PKEY_PARAM_DIST_ID 172 +#define PIDX_PKEY_PARAM_EC_A 173 +#define PIDX_PKEY_PARAM_EC_B 174 +#define PIDX_PKEY_PARAM_EC_CHAR2_M 175 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 176 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 177 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 178 +#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 179 +#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 180 +#define PIDX_PKEY_PARAM_EC_COFACTOR 181 +#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 182 +#define PIDX_PKEY_PARAM_EC_ENCODING 183 +#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 184 +#define PIDX_PKEY_PARAM_EC_GENERATOR 185 +#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 186 +#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 187 +#define PIDX_PKEY_PARAM_EC_ORDER 188 +#define PIDX_PKEY_PARAM_EC_P 130 +#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 189 +#define PIDX_PKEY_PARAM_EC_PUB_X 190 +#define PIDX_PKEY_PARAM_EC_PUB_Y 191 +#define PIDX_PKEY_PARAM_EC_SEED 132 +#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 192 +#define PIDX_PKEY_PARAM_ENGINE PIDX_ALG_PARAM_ENGINE +#define PIDX_PKEY_PARAM_FFC_COFACTOR 193 +#define PIDX_PKEY_PARAM_FFC_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_PKEY_PARAM_FFC_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_FFC_G 194 +#define PIDX_PKEY_PARAM_FFC_GINDEX 195 +#define PIDX_PKEY_PARAM_FFC_H 196 +#define PIDX_PKEY_PARAM_FFC_P 130 +#define PIDX_PKEY_PARAM_FFC_PBITS 197 +#define PIDX_PKEY_PARAM_FFC_PCOUNTER 198 +#define PIDX_PKEY_PARAM_FFC_Q 199 +#define PIDX_PKEY_PARAM_FFC_QBITS 200 +#define PIDX_PKEY_PARAM_FFC_SEED 132 +#define PIDX_PKEY_PARAM_FFC_TYPE 134 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 201 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 202 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 203 +#define PIDX_PKEY_PARAM_GROUP_NAME 204 +#define PIDX_PKEY_PARAM_IMPLICIT_REJECTION 5 +#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 205 +#define PIDX_PKEY_PARAM_MASKGENFUNC 206 +#define PIDX_PKEY_PARAM_MAX_SIZE 207 +#define PIDX_PKEY_PARAM_MGF1_DIGEST 208 +#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 209 +#define PIDX_PKEY_PARAM_PAD_MODE 210 +#define PIDX_PKEY_PARAM_PRIV_KEY 211 +#define PIDX_PKEY_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_PUB_KEY 212 +#define PIDX_PKEY_PARAM_RSA_BITS PIDX_PKEY_PARAM_BITS +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 213 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 214 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 215 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 216 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 217 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 218 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 219 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 220 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 221 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 222 +#define PIDX_PKEY_PARAM_RSA_D 223 +#define PIDX_PKEY_PARAM_RSA_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_PKEY_PARAM_RSA_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_RSA_E 224 +#define PIDX_PKEY_PARAM_RSA_EXPONENT 225 +#define PIDX_PKEY_PARAM_RSA_EXPONENT1 226 +#define PIDX_PKEY_PARAM_RSA_EXPONENT10 227 +#define PIDX_PKEY_PARAM_RSA_EXPONENT2 228 +#define PIDX_PKEY_PARAM_RSA_EXPONENT3 229 +#define PIDX_PKEY_PARAM_RSA_EXPONENT4 230 +#define PIDX_PKEY_PARAM_RSA_EXPONENT5 231 +#define PIDX_PKEY_PARAM_RSA_EXPONENT6 232 +#define PIDX_PKEY_PARAM_RSA_EXPONENT7 233 +#define PIDX_PKEY_PARAM_RSA_EXPONENT8 234 +#define PIDX_PKEY_PARAM_RSA_EXPONENT9 235 +#define PIDX_PKEY_PARAM_RSA_FACTOR 236 +#define PIDX_PKEY_PARAM_RSA_FACTOR1 237 +#define PIDX_PKEY_PARAM_RSA_FACTOR10 238 +#define PIDX_PKEY_PARAM_RSA_FACTOR2 239 +#define PIDX_PKEY_PARAM_RSA_FACTOR3 240 +#define PIDX_PKEY_PARAM_RSA_FACTOR4 241 +#define PIDX_PKEY_PARAM_RSA_FACTOR5 242 +#define PIDX_PKEY_PARAM_RSA_FACTOR6 243 +#define PIDX_PKEY_PARAM_RSA_FACTOR7 244 +#define PIDX_PKEY_PARAM_RSA_FACTOR8 245 +#define PIDX_PKEY_PARAM_RSA_FACTOR9 246 +#define PIDX_PKEY_PARAM_RSA_MASKGENFUNC PIDX_PKEY_PARAM_MASKGENFUNC +#define PIDX_PKEY_PARAM_RSA_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_PKEY_PARAM_RSA_N 129 +#define PIDX_PKEY_PARAM_RSA_PRIMES 247 +#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 248 +#define PIDX_PKEY_PARAM_RSA_TEST_P1 249 +#define PIDX_PKEY_PARAM_RSA_TEST_P2 250 +#define PIDX_PKEY_PARAM_RSA_TEST_Q1 251 +#define PIDX_PKEY_PARAM_RSA_TEST_Q2 252 +#define PIDX_PKEY_PARAM_RSA_TEST_XP 253 +#define PIDX_PKEY_PARAM_RSA_TEST_XP1 254 +#define PIDX_PKEY_PARAM_RSA_TEST_XP2 255 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ 256 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 257 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 258 +#define PIDX_PKEY_PARAM_SECURITY_BITS 259 +#define PIDX_PKEY_PARAM_USE_COFACTOR_ECDH PIDX_PKEY_PARAM_USE_COFACTOR_FLAG +#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 260 +#define PIDX_PROV_PARAM_BUILDINFO 261 +#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 262 +#define PIDX_PROV_PARAM_CORE_PROV_NAME 263 +#define PIDX_PROV_PARAM_CORE_VERSION 264 +#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 265 +#define PIDX_PROV_PARAM_NAME 266 +#define PIDX_PROV_PARAM_SECURITY_CHECKS 267 +#define PIDX_PROV_PARAM_SELF_TEST_DESC 268 +#define PIDX_PROV_PARAM_SELF_TEST_PHASE 269 +#define PIDX_PROV_PARAM_SELF_TEST_TYPE 270 +#define PIDX_PROV_PARAM_STATUS 271 +#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 272 +#define PIDX_PROV_PARAM_VERSION 108 +#define PIDX_RAND_PARAM_GENERATE 273 +#define PIDX_RAND_PARAM_MAX_REQUEST 274 +#define PIDX_RAND_PARAM_STATE 275 +#define PIDX_RAND_PARAM_STRENGTH 276 +#define PIDX_RAND_PARAM_TEST_ENTROPY 277 +#define PIDX_RAND_PARAM_TEST_NONCE 278 +#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID 279 +#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 280 +#define PIDX_SIGNATURE_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_SIGNATURE_PARAM_DIGEST_SIZE PIDX_PKEY_PARAM_DIGEST_SIZE +#define PIDX_SIGNATURE_PARAM_INSTANCE 281 +#define PIDX_SIGNATURE_PARAM_KAT 282 +#define PIDX_SIGNATURE_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_SIGNATURE_PARAM_MGF1_PROPERTIES PIDX_PKEY_PARAM_MGF1_PROPERTIES +#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 283 +#define PIDX_SIGNATURE_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE +#define PIDX_SIGNATURE_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 248 +#define PIDX_STORE_PARAM_ALIAS 284 +#define PIDX_STORE_PARAM_DIGEST 1 +#define PIDX_STORE_PARAM_EXPECT 285 +#define PIDX_STORE_PARAM_FINGERPRINT 286 +#define PIDX_STORE_PARAM_INPUT_TYPE 287 +#define PIDX_STORE_PARAM_ISSUER 266 +#define PIDX_STORE_PARAM_PROPERTIES 4 +#define PIDX_STORE_PARAM_SERIAL 288 +#define PIDX_STORE_PARAM_SUBJECT 289 diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h new file mode 100644 index 00000000000..09712345941 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1.h @@ -0,0 +1,1133 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/asn1.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ASN1_H +# define OPENSSL_ASN1_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ASN1_H +# endif + +# ifndef OPENSSL_NO_STDIO +# include +# endif +# include +# include +# include +# include +# include +# include +# include + +# include +# include + +# ifdef OPENSSL_BUILD_SHLIBCRYPTO +# undef OPENSSL_EXTERN +# define OPENSSL_EXTERN OPENSSL_EXPORT +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define V_ASN1_UNIVERSAL 0x00 +# define V_ASN1_APPLICATION 0x40 +# define V_ASN1_CONTEXT_SPECIFIC 0x80 +# define V_ASN1_PRIVATE 0xc0 + +# define V_ASN1_CONSTRUCTED 0x20 +# define V_ASN1_PRIMITIVE_TAG 0x1f +# define V_ASN1_PRIMATIVE_TAG /*compat*/ V_ASN1_PRIMITIVE_TAG + +# define V_ASN1_APP_CHOOSE -2/* let the recipient choose */ +# define V_ASN1_OTHER -3/* used in ASN1_TYPE */ +# define V_ASN1_ANY -4/* used in ASN1 template code */ + +# define V_ASN1_UNDEF -1 +/* ASN.1 tag values */ +# define V_ASN1_EOC 0 +# define V_ASN1_BOOLEAN 1 /**/ +# define V_ASN1_INTEGER 2 +# define V_ASN1_BIT_STRING 3 +# define V_ASN1_OCTET_STRING 4 +# define V_ASN1_NULL 5 +# define V_ASN1_OBJECT 6 +# define V_ASN1_OBJECT_DESCRIPTOR 7 +# define V_ASN1_EXTERNAL 8 +# define V_ASN1_REAL 9 +# define V_ASN1_ENUMERATED 10 +# define V_ASN1_UTF8STRING 12 +# define V_ASN1_SEQUENCE 16 +# define V_ASN1_SET 17 +# define V_ASN1_NUMERICSTRING 18 /**/ +# define V_ASN1_PRINTABLESTRING 19 +# define V_ASN1_T61STRING 20 +# define V_ASN1_TELETEXSTRING 20/* alias */ +# define V_ASN1_VIDEOTEXSTRING 21 /**/ +# define V_ASN1_IA5STRING 22 +# define V_ASN1_UTCTIME 23 +# define V_ASN1_GENERALIZEDTIME 24 /**/ +# define V_ASN1_GRAPHICSTRING 25 /**/ +# define V_ASN1_ISO64STRING 26 /**/ +# define V_ASN1_VISIBLESTRING 26/* alias */ +# define V_ASN1_GENERALSTRING 27 /**/ +# define V_ASN1_UNIVERSALSTRING 28 /**/ +# define V_ASN1_BMPSTRING 30 + +/* + * NB the constants below are used internally by ASN1_INTEGER + * and ASN1_ENUMERATED to indicate the sign. They are *not* on + * the wire tag values. + */ + +# define V_ASN1_NEG 0x100 +# define V_ASN1_NEG_INTEGER (2 | V_ASN1_NEG) +# define V_ASN1_NEG_ENUMERATED (10 | V_ASN1_NEG) + +/* For use with d2i_ASN1_type_bytes() */ +# define B_ASN1_NUMERICSTRING 0x0001 +# define B_ASN1_PRINTABLESTRING 0x0002 +# define B_ASN1_T61STRING 0x0004 +# define B_ASN1_TELETEXSTRING 0x0004 +# define B_ASN1_VIDEOTEXSTRING 0x0008 +# define B_ASN1_IA5STRING 0x0010 +# define B_ASN1_GRAPHICSTRING 0x0020 +# define B_ASN1_ISO64STRING 0x0040 +# define B_ASN1_VISIBLESTRING 0x0040 +# define B_ASN1_GENERALSTRING 0x0080 +# define B_ASN1_UNIVERSALSTRING 0x0100 +# define B_ASN1_OCTET_STRING 0x0200 +# define B_ASN1_BIT_STRING 0x0400 +# define B_ASN1_BMPSTRING 0x0800 +# define B_ASN1_UNKNOWN 0x1000 +# define B_ASN1_UTF8STRING 0x2000 +# define B_ASN1_UTCTIME 0x4000 +# define B_ASN1_GENERALIZEDTIME 0x8000 +# define B_ASN1_SEQUENCE 0x10000 +/* For use with ASN1_mbstring_copy() */ +# define MBSTRING_FLAG 0x1000 +# define MBSTRING_UTF8 (MBSTRING_FLAG) +# define MBSTRING_ASC (MBSTRING_FLAG|1) +# define MBSTRING_BMP (MBSTRING_FLAG|2) +# define MBSTRING_UNIV (MBSTRING_FLAG|4) +# define SMIME_OLDMIME 0x400 +# define SMIME_CRLFEOL 0x800 +# define SMIME_STREAM 0x1000 + +/* Stacks for types not otherwise defined in this header */ +SKM_DEFINE_STACK_OF_INTERNAL(X509_ALGOR, X509_ALGOR, X509_ALGOR) +#define sk_X509_ALGOR_num(sk) OPENSSL_sk_num(ossl_check_const_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_value(sk, idx) ((X509_ALGOR *)OPENSSL_sk_value(ossl_check_const_X509_ALGOR_sk_type(sk), (idx))) +#define sk_X509_ALGOR_new(cmp) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new(ossl_check_X509_ALGOR_compfunc_type(cmp))) +#define sk_X509_ALGOR_new_null() ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new_null()) +#define sk_X509_ALGOR_new_reserve(cmp, n) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new_reserve(ossl_check_X509_ALGOR_compfunc_type(cmp), (n))) +#define sk_X509_ALGOR_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_ALGOR_sk_type(sk), (n)) +#define sk_X509_ALGOR_free(sk) OPENSSL_sk_free(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_zero(sk) OPENSSL_sk_zero(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_delete(sk, i) ((X509_ALGOR *)OPENSSL_sk_delete(ossl_check_X509_ALGOR_sk_type(sk), (i))) +#define sk_X509_ALGOR_delete_ptr(sk, ptr) ((X509_ALGOR *)OPENSSL_sk_delete_ptr(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr))) +#define sk_X509_ALGOR_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_pop(sk) ((X509_ALGOR *)OPENSSL_sk_pop(ossl_check_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_shift(sk) ((X509_ALGOR *)OPENSSL_sk_shift(ossl_check_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_ALGOR_sk_type(sk),ossl_check_X509_ALGOR_freefunc_type(freefunc)) +#define sk_X509_ALGOR_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr), (idx)) +#define sk_X509_ALGOR_set(sk, idx, ptr) ((X509_ALGOR *)OPENSSL_sk_set(ossl_check_X509_ALGOR_sk_type(sk), (idx), ossl_check_X509_ALGOR_type(ptr))) +#define sk_X509_ALGOR_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr), pnum) +#define sk_X509_ALGOR_sort(sk) OPENSSL_sk_sort(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_dup(sk) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_dup(ossl_check_const_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_copyfunc_type(copyfunc), ossl_check_X509_ALGOR_freefunc_type(freefunc))) +#define sk_X509_ALGOR_set_cmp_func(sk, cmp) ((sk_X509_ALGOR_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_compfunc_type(cmp))) + + + +# define ASN1_STRING_FLAG_BITS_LEFT 0x08 /* Set if 0x07 has bits left value */ +/* + * This indicates that the ASN1_STRING is not a real value but just a place + * holder for the location where indefinite length constructed data should be + * inserted in the memory buffer + */ +# define ASN1_STRING_FLAG_NDEF 0x010 + +/* + * This flag is used by the CMS code to indicate that a string is not + * complete and is a place holder for content when it had all been accessed. + * The flag will be reset when content has been written to it. + */ + +# define ASN1_STRING_FLAG_CONT 0x020 +/* + * This flag is used by ASN1 code to indicate an ASN1_STRING is an MSTRING + * type. + */ +# define ASN1_STRING_FLAG_MSTRING 0x040 +/* String is embedded and only content should be freed */ +# define ASN1_STRING_FLAG_EMBED 0x080 +/* String should be parsed in RFC 5280's time format */ +# define ASN1_STRING_FLAG_X509_TIME 0x100 +/* This is the base type that holds just about everything :-) */ +struct asn1_string_st { + int length; + int type; + unsigned char *data; + /* + * The value of the following field depends on the type being held. It + * is mostly being used for BIT_STRING so if the input data has a + * non-zero 'unused bits' value, it will be handled correctly + */ + long flags; +}; + +/* + * ASN1_ENCODING structure: this is used to save the received encoding of an + * ASN1 type. This is useful to get round problems with invalid encodings + * which can break signatures. + */ + +typedef struct ASN1_ENCODING_st { + unsigned char *enc; /* DER encoding */ + long len; /* Length of encoding */ + int modified; /* set to 1 if 'enc' is invalid */ +} ASN1_ENCODING; + +/* Used with ASN1 LONG type: if a long is set to this it is omitted */ +# define ASN1_LONG_UNDEF 0x7fffffffL + +# define STABLE_FLAGS_MALLOC 0x01 +/* + * A zero passed to ASN1_STRING_TABLE_new_add for the flags is interpreted + * as "don't change" and STABLE_FLAGS_MALLOC is always set. By setting + * STABLE_FLAGS_MALLOC only we can clear the existing value. Use the alias + * STABLE_FLAGS_CLEAR to reflect this. + */ +# define STABLE_FLAGS_CLEAR STABLE_FLAGS_MALLOC +# define STABLE_NO_MASK 0x02 +# define DIRSTRING_TYPE \ + (B_ASN1_PRINTABLESTRING|B_ASN1_T61STRING|B_ASN1_BMPSTRING|B_ASN1_UTF8STRING) +# define PKCS9STRING_TYPE (DIRSTRING_TYPE|B_ASN1_IA5STRING) + +struct asn1_string_table_st { + int nid; + long minsize; + long maxsize; + unsigned long mask; + unsigned long flags; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_STRING_TABLE, ASN1_STRING_TABLE, ASN1_STRING_TABLE) +#define sk_ASN1_STRING_TABLE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_value(sk, idx) ((ASN1_STRING_TABLE *)OPENSSL_sk_value(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk), (idx))) +#define sk_ASN1_STRING_TABLE_new(cmp) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new(ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp))) +#define sk_ASN1_STRING_TABLE_new_null() ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_STRING_TABLE_new_reserve(cmp, n) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp), (n))) +#define sk_ASN1_STRING_TABLE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (n)) +#define sk_ASN1_STRING_TABLE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_delete(sk, i) ((ASN1_STRING_TABLE *)OPENSSL_sk_delete(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (i))) +#define sk_ASN1_STRING_TABLE_delete_ptr(sk, ptr) ((ASN1_STRING_TABLE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr))) +#define sk_ASN1_STRING_TABLE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_pop(sk) ((ASN1_STRING_TABLE *)OPENSSL_sk_pop(ossl_check_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_shift(sk) ((ASN1_STRING_TABLE *)OPENSSL_sk_shift(ossl_check_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_STRING_TABLE_sk_type(sk),ossl_check_ASN1_STRING_TABLE_freefunc_type(freefunc)) +#define sk_ASN1_STRING_TABLE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr), (idx)) +#define sk_ASN1_STRING_TABLE_set(sk, idx, ptr) ((ASN1_STRING_TABLE *)OPENSSL_sk_set(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (idx), ossl_check_ASN1_STRING_TABLE_type(ptr))) +#define sk_ASN1_STRING_TABLE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr), pnum) +#define sk_ASN1_STRING_TABLE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_dup(sk) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_copyfunc_type(copyfunc), ossl_check_ASN1_STRING_TABLE_freefunc_type(freefunc))) +#define sk_ASN1_STRING_TABLE_set_cmp_func(sk, cmp) ((sk_ASN1_STRING_TABLE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp))) + + +/* size limits: this stuff is taken straight from RFC2459 */ + +# define ub_name 32768 +# define ub_common_name 64 +# define ub_locality_name 128 +# define ub_state_name 128 +# define ub_organization_name 64 +# define ub_organization_unit_name 64 +# define ub_title 64 +# define ub_email_address 128 + +/* + * Declarations for template structures: for full definitions see asn1t.h + */ +typedef struct ASN1_TEMPLATE_st ASN1_TEMPLATE; +typedef struct ASN1_TLC_st ASN1_TLC; +/* This is just an opaque pointer */ +typedef struct ASN1_VALUE_st ASN1_VALUE; + +/* Declare ASN1 functions: the implement macro in in asn1t.h */ + +/* + * The mysterious 'extern' that's passed to some macros is innocuous, + * and is there to quiet pre-C99 compilers that may complain about empty + * arguments in macro calls. + */ + +# define DECLARE_ASN1_FUNCTIONS_attr(attr, type) \ + DECLARE_ASN1_FUNCTIONS_name_attr(attr, type, type) +# define DECLARE_ASN1_FUNCTIONS(type) \ + DECLARE_ASN1_FUNCTIONS_attr(extern, type) + +# define DECLARE_ASN1_ALLOC_FUNCTIONS_attr(attr, type) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, type) +# define DECLARE_ASN1_ALLOC_FUNCTIONS(type) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_attr(extern, type) + +# define DECLARE_ASN1_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(attr, type, name) +# define DECLARE_ASN1_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_attr(attr, type, itname, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(attr, type, name) \ + DECLARE_ASN1_ITEM_attr(attr, itname) +# define DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_attr(extern, type, itname, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_attr(attr, type, name, name) +# define DECLARE_ASN1_ENCODE_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(attr, type, name) \ + attr type *d2i_##name(type **a, const unsigned char **in, long len); \ + attr int i2d_##name(const type *a, unsigned char **out); +# define DECLARE_ASN1_ENCODE_FUNCTIONS_only(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(extern, type, name) + +# define DECLARE_ASN1_NDEF_FUNCTION_attr(attr, name) \ + attr int i2d_##name##_NDEF(const name *a, unsigned char **out); +# define DECLARE_ASN1_NDEF_FUNCTION(name) \ + DECLARE_ASN1_NDEF_FUNCTION_attr(extern, name) + +# define DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, name) \ + attr type *name##_new(void); \ + attr void name##_free(type *a); +# define DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_DUP_FUNCTION_attr(attr, type) \ + DECLARE_ASN1_DUP_FUNCTION_name_attr(attr, type, type) +# define DECLARE_ASN1_DUP_FUNCTION(type) \ + DECLARE_ASN1_DUP_FUNCTION_attr(extern, type) + +# define DECLARE_ASN1_DUP_FUNCTION_name_attr(attr, type, name) \ + attr type *name##_dup(const type *a); +# define DECLARE_ASN1_DUP_FUNCTION_name(type, name) \ + DECLARE_ASN1_DUP_FUNCTION_name_attr(extern, type, name) + +# define DECLARE_ASN1_PRINT_FUNCTION_attr(attr, stname) \ + DECLARE_ASN1_PRINT_FUNCTION_fname_attr(attr, stname, stname) +# define DECLARE_ASN1_PRINT_FUNCTION(stname) \ + DECLARE_ASN1_PRINT_FUNCTION_attr(extern, stname) + +# define DECLARE_ASN1_PRINT_FUNCTION_fname_attr(attr, stname, fname) \ + attr int fname##_print_ctx(BIO *out, const stname *x, int indent, \ + const ASN1_PCTX *pctx); +# define DECLARE_ASN1_PRINT_FUNCTION_fname(stname, fname) \ + DECLARE_ASN1_PRINT_FUNCTION_fname_attr(extern, stname, fname) + +# define D2I_OF(type) type *(*)(type **,const unsigned char **,long) +# define I2D_OF(type) int (*)(const type *,unsigned char **) + +# define CHECKED_D2I_OF(type, d2i) \ + ((d2i_of_void*) (1 ? d2i : ((D2I_OF(type))0))) +# define CHECKED_I2D_OF(type, i2d) \ + ((i2d_of_void*) (1 ? i2d : ((I2D_OF(type))0))) +# define CHECKED_NEW_OF(type, xnew) \ + ((void *(*)(void)) (1 ? xnew : ((type *(*)(void))0))) +# define CHECKED_PTR_OF(type, p) \ + ((void*) (1 ? p : (type*)0)) +# define CHECKED_PPTR_OF(type, p) \ + ((void**) (1 ? p : (type**)0)) + +# define TYPEDEF_D2I_OF(type) typedef type *d2i_of_##type(type **,const unsigned char **,long) +# define TYPEDEF_I2D_OF(type) typedef int i2d_of_##type(const type *,unsigned char **) +# define TYPEDEF_D2I2D_OF(type) TYPEDEF_D2I_OF(type); TYPEDEF_I2D_OF(type) + +typedef void *d2i_of_void(void **, const unsigned char **, long); +typedef int i2d_of_void(const void *, unsigned char **); + +/*- + * The following macros and typedefs allow an ASN1_ITEM + * to be embedded in a structure and referenced. Since + * the ASN1_ITEM pointers need to be globally accessible + * (possibly from shared libraries) they may exist in + * different forms. On platforms that support it the + * ASN1_ITEM structure itself will be globally exported. + * Other platforms will export a function that returns + * an ASN1_ITEM pointer. + * + * To handle both cases transparently the macros below + * should be used instead of hard coding an ASN1_ITEM + * pointer in a structure. + * + * The structure will look like this: + * + * typedef struct SOMETHING_st { + * ... + * ASN1_ITEM_EXP *iptr; + * ... + * } SOMETHING; + * + * It would be initialised as e.g.: + * + * SOMETHING somevar = {...,ASN1_ITEM_ref(X509),...}; + * + * and the actual pointer extracted with: + * + * const ASN1_ITEM *it = ASN1_ITEM_ptr(somevar.iptr); + * + * Finally an ASN1_ITEM pointer can be extracted from an + * appropriate reference with: ASN1_ITEM_rptr(X509). This + * would be used when a function takes an ASN1_ITEM * argument. + * + */ + + +/* + * Platforms that can't easily handle shared global variables are declared as + * functions returning ASN1_ITEM pointers. + */ + +/* ASN1_ITEM pointer exported type */ +typedef const ASN1_ITEM *ASN1_ITEM_EXP (void); + +/* Macro to obtain ASN1_ITEM pointer from exported type */ +# define ASN1_ITEM_ptr(iptr) (iptr()) + +/* Macro to include ASN1_ITEM pointer from base type */ +# define ASN1_ITEM_ref(iptr) (iptr##_it) + +# define ASN1_ITEM_rptr(ref) (ref##_it()) + +# define DECLARE_ASN1_ITEM_attr(attr, name) \ + attr const ASN1_ITEM * name##_it(void); +# define DECLARE_ASN1_ITEM(name) \ + DECLARE_ASN1_ITEM_attr(extern, name) + +/* Parameters used by ASN1_STRING_print_ex() */ + +/* + * These determine which characters to escape: RFC2253 special characters, + * control characters and MSB set characters + */ + +# define ASN1_STRFLGS_ESC_2253 1 +# define ASN1_STRFLGS_ESC_CTRL 2 +# define ASN1_STRFLGS_ESC_MSB 4 + +/* Lower 8 bits are reserved as an output type specifier */ +# define ASN1_DTFLGS_TYPE_MASK 0x0FUL +# define ASN1_DTFLGS_RFC822 0x00UL +# define ASN1_DTFLGS_ISO8601 0x01UL + +/* + * This flag determines how we do escaping: normally RC2253 backslash only, + * set this to use backslash and quote. + */ + +# define ASN1_STRFLGS_ESC_QUOTE 8 + +/* These three flags are internal use only. */ + +/* Character is a valid PrintableString character */ +# define CHARTYPE_PRINTABLESTRING 0x10 +/* Character needs escaping if it is the first character */ +# define CHARTYPE_FIRST_ESC_2253 0x20 +/* Character needs escaping if it is the last character */ +# define CHARTYPE_LAST_ESC_2253 0x40 + +/* + * NB the internal flags are safely reused below by flags handled at the top + * level. + */ + +/* + * If this is set we convert all character strings to UTF8 first + */ + +# define ASN1_STRFLGS_UTF8_CONVERT 0x10 + +/* + * If this is set we don't attempt to interpret content: just assume all + * strings are 1 byte per character. This will produce some pretty odd + * looking output! + */ + +# define ASN1_STRFLGS_IGNORE_TYPE 0x20 + +/* If this is set we include the string type in the output */ +# define ASN1_STRFLGS_SHOW_TYPE 0x40 + +/* + * This determines which strings to display and which to 'dump' (hex dump of + * content octets or DER encoding). We can only dump non character strings or + * everything. If we don't dump 'unknown' they are interpreted as character + * strings with 1 octet per character and are subject to the usual escaping + * options. + */ + +# define ASN1_STRFLGS_DUMP_ALL 0x80 +# define ASN1_STRFLGS_DUMP_UNKNOWN 0x100 + +/* + * These determine what 'dumping' does, we can dump the content octets or the + * DER encoding: both use the RFC2253 #XXXXX notation. + */ + +# define ASN1_STRFLGS_DUMP_DER 0x200 + +/* + * This flag specifies that RC2254 escaping shall be performed. + */ +#define ASN1_STRFLGS_ESC_2254 0x400 + +/* + * All the string flags consistent with RFC2253, escaping control characters + * isn't essential in RFC2253 but it is advisable anyway. + */ + +# define ASN1_STRFLGS_RFC2253 (ASN1_STRFLGS_ESC_2253 | \ + ASN1_STRFLGS_ESC_CTRL | \ + ASN1_STRFLGS_ESC_MSB | \ + ASN1_STRFLGS_UTF8_CONVERT | \ + ASN1_STRFLGS_DUMP_UNKNOWN | \ + ASN1_STRFLGS_DUMP_DER) + + +struct asn1_type_st { + int type; + union { + char *ptr; + ASN1_BOOLEAN boolean; + ASN1_STRING *asn1_string; + ASN1_OBJECT *object; + ASN1_INTEGER *integer; + ASN1_ENUMERATED *enumerated; + ASN1_BIT_STRING *bit_string; + ASN1_OCTET_STRING *octet_string; + ASN1_PRINTABLESTRING *printablestring; + ASN1_T61STRING *t61string; + ASN1_IA5STRING *ia5string; + ASN1_GENERALSTRING *generalstring; + ASN1_BMPSTRING *bmpstring; + ASN1_UNIVERSALSTRING *universalstring; + ASN1_UTCTIME *utctime; + ASN1_GENERALIZEDTIME *generalizedtime; + ASN1_VISIBLESTRING *visiblestring; + ASN1_UTF8STRING *utf8string; + /* + * set and sequence are left complete and still contain the set or + * sequence bytes + */ + ASN1_STRING *set; + ASN1_STRING *sequence; + ASN1_VALUE *asn1_value; + } value; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_TYPE, ASN1_TYPE, ASN1_TYPE) +#define sk_ASN1_TYPE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_value(sk, idx) ((ASN1_TYPE *)OPENSSL_sk_value(ossl_check_const_ASN1_TYPE_sk_type(sk), (idx))) +#define sk_ASN1_TYPE_new(cmp) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new(ossl_check_ASN1_TYPE_compfunc_type(cmp))) +#define sk_ASN1_TYPE_new_null() ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_TYPE_new_reserve(cmp, n) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_TYPE_compfunc_type(cmp), (n))) +#define sk_ASN1_TYPE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_TYPE_sk_type(sk), (n)) +#define sk_ASN1_TYPE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_delete(sk, i) ((ASN1_TYPE *)OPENSSL_sk_delete(ossl_check_ASN1_TYPE_sk_type(sk), (i))) +#define sk_ASN1_TYPE_delete_ptr(sk, ptr) ((ASN1_TYPE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr))) +#define sk_ASN1_TYPE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_pop(sk) ((ASN1_TYPE *)OPENSSL_sk_pop(ossl_check_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_shift(sk) ((ASN1_TYPE *)OPENSSL_sk_shift(ossl_check_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_TYPE_sk_type(sk),ossl_check_ASN1_TYPE_freefunc_type(freefunc)) +#define sk_ASN1_TYPE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr), (idx)) +#define sk_ASN1_TYPE_set(sk, idx, ptr) ((ASN1_TYPE *)OPENSSL_sk_set(ossl_check_ASN1_TYPE_sk_type(sk), (idx), ossl_check_ASN1_TYPE_type(ptr))) +#define sk_ASN1_TYPE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr), pnum) +#define sk_ASN1_TYPE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_dup(sk) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_copyfunc_type(copyfunc), ossl_check_ASN1_TYPE_freefunc_type(freefunc))) +#define sk_ASN1_TYPE_set_cmp_func(sk, cmp) ((sk_ASN1_TYPE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_compfunc_type(cmp))) + + +typedef STACK_OF(ASN1_TYPE) ASN1_SEQUENCE_ANY; + +DECLARE_ASN1_ENCODE_FUNCTIONS_name(ASN1_SEQUENCE_ANY, ASN1_SEQUENCE_ANY) +DECLARE_ASN1_ENCODE_FUNCTIONS_name(ASN1_SEQUENCE_ANY, ASN1_SET_ANY) + +/* This is used to contain a list of bit names */ +typedef struct BIT_STRING_BITNAME_st { + int bitnum; + const char *lname; + const char *sname; +} BIT_STRING_BITNAME; + +# define B_ASN1_TIME \ + B_ASN1_UTCTIME | \ + B_ASN1_GENERALIZEDTIME + +# define B_ASN1_PRINTABLE \ + B_ASN1_NUMERICSTRING| \ + B_ASN1_PRINTABLESTRING| \ + B_ASN1_T61STRING| \ + B_ASN1_IA5STRING| \ + B_ASN1_BIT_STRING| \ + B_ASN1_UNIVERSALSTRING|\ + B_ASN1_BMPSTRING|\ + B_ASN1_UTF8STRING|\ + B_ASN1_SEQUENCE|\ + B_ASN1_UNKNOWN + +# define B_ASN1_DIRECTORYSTRING \ + B_ASN1_PRINTABLESTRING| \ + B_ASN1_TELETEXSTRING|\ + B_ASN1_BMPSTRING|\ + B_ASN1_UNIVERSALSTRING|\ + B_ASN1_UTF8STRING + +# define B_ASN1_DISPLAYTEXT \ + B_ASN1_IA5STRING| \ + B_ASN1_VISIBLESTRING| \ + B_ASN1_BMPSTRING|\ + B_ASN1_UTF8STRING + +DECLARE_ASN1_ALLOC_FUNCTIONS_name(ASN1_TYPE, ASN1_TYPE) +DECLARE_ASN1_ENCODE_FUNCTIONS(ASN1_TYPE, ASN1_ANY, ASN1_TYPE) + +int ASN1_TYPE_get(const ASN1_TYPE *a); +void ASN1_TYPE_set(ASN1_TYPE *a, int type, void *value); +int ASN1_TYPE_set1(ASN1_TYPE *a, int type, const void *value); +int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b); + +ASN1_TYPE *ASN1_TYPE_pack_sequence(const ASN1_ITEM *it, void *s, ASN1_TYPE **t); +void *ASN1_TYPE_unpack_sequence(const ASN1_ITEM *it, const ASN1_TYPE *t); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_OBJECT, ASN1_OBJECT, ASN1_OBJECT) +#define sk_ASN1_OBJECT_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_value(sk, idx) ((ASN1_OBJECT *)OPENSSL_sk_value(ossl_check_const_ASN1_OBJECT_sk_type(sk), (idx))) +#define sk_ASN1_OBJECT_new(cmp) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new(ossl_check_ASN1_OBJECT_compfunc_type(cmp))) +#define sk_ASN1_OBJECT_new_null() ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new_null()) +#define sk_ASN1_OBJECT_new_reserve(cmp, n) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_OBJECT_compfunc_type(cmp), (n))) +#define sk_ASN1_OBJECT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_OBJECT_sk_type(sk), (n)) +#define sk_ASN1_OBJECT_free(sk) OPENSSL_sk_free(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_delete(sk, i) ((ASN1_OBJECT *)OPENSSL_sk_delete(ossl_check_ASN1_OBJECT_sk_type(sk), (i))) +#define sk_ASN1_OBJECT_delete_ptr(sk, ptr) ((ASN1_OBJECT *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr))) +#define sk_ASN1_OBJECT_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_pop(sk) ((ASN1_OBJECT *)OPENSSL_sk_pop(ossl_check_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_shift(sk) ((ASN1_OBJECT *)OPENSSL_sk_shift(ossl_check_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_OBJECT_sk_type(sk),ossl_check_ASN1_OBJECT_freefunc_type(freefunc)) +#define sk_ASN1_OBJECT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr), (idx)) +#define sk_ASN1_OBJECT_set(sk, idx, ptr) ((ASN1_OBJECT *)OPENSSL_sk_set(ossl_check_ASN1_OBJECT_sk_type(sk), (idx), ossl_check_ASN1_OBJECT_type(ptr))) +#define sk_ASN1_OBJECT_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr), pnum) +#define sk_ASN1_OBJECT_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_dup(sk) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_dup(ossl_check_const_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_copyfunc_type(copyfunc), ossl_check_ASN1_OBJECT_freefunc_type(freefunc))) +#define sk_ASN1_OBJECT_set_cmp_func(sk, cmp) ((sk_ASN1_OBJECT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(ASN1_OBJECT) + +ASN1_STRING *ASN1_STRING_new(void); +void ASN1_STRING_free(ASN1_STRING *a); +void ASN1_STRING_clear_free(ASN1_STRING *a); +int ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str); +DECLARE_ASN1_DUP_FUNCTION(ASN1_STRING) +ASN1_STRING *ASN1_STRING_type_new(int type); +int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b); + /* + * Since this is used to store all sorts of things, via macros, for now, + * make its data void * + */ +int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len); +void ASN1_STRING_set0(ASN1_STRING *str, void *data, int len); +int ASN1_STRING_length(const ASN1_STRING *x); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 void ASN1_STRING_length_set(ASN1_STRING *x, int n); +# endif +int ASN1_STRING_type(const ASN1_STRING *x); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 unsigned char *ASN1_STRING_data(ASN1_STRING *x); +# endif +const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *x); + +DECLARE_ASN1_FUNCTIONS(ASN1_BIT_STRING) +int ASN1_BIT_STRING_set(ASN1_BIT_STRING *a, unsigned char *d, int length); +int ASN1_BIT_STRING_set_bit(ASN1_BIT_STRING *a, int n, int value); +int ASN1_BIT_STRING_get_bit(const ASN1_BIT_STRING *a, int n); +int ASN1_BIT_STRING_check(const ASN1_BIT_STRING *a, + const unsigned char *flags, int flags_len); + +int ASN1_BIT_STRING_name_print(BIO *out, ASN1_BIT_STRING *bs, + BIT_STRING_BITNAME *tbl, int indent); +int ASN1_BIT_STRING_num_asc(const char *name, BIT_STRING_BITNAME *tbl); +int ASN1_BIT_STRING_set_asc(ASN1_BIT_STRING *bs, const char *name, int value, + BIT_STRING_BITNAME *tbl); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_INTEGER, ASN1_INTEGER, ASN1_INTEGER) +#define sk_ASN1_INTEGER_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_value(sk, idx) ((ASN1_INTEGER *)OPENSSL_sk_value(ossl_check_const_ASN1_INTEGER_sk_type(sk), (idx))) +#define sk_ASN1_INTEGER_new(cmp) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new(ossl_check_ASN1_INTEGER_compfunc_type(cmp))) +#define sk_ASN1_INTEGER_new_null() ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new_null()) +#define sk_ASN1_INTEGER_new_reserve(cmp, n) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_INTEGER_compfunc_type(cmp), (n))) +#define sk_ASN1_INTEGER_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_INTEGER_sk_type(sk), (n)) +#define sk_ASN1_INTEGER_free(sk) OPENSSL_sk_free(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_delete(sk, i) ((ASN1_INTEGER *)OPENSSL_sk_delete(ossl_check_ASN1_INTEGER_sk_type(sk), (i))) +#define sk_ASN1_INTEGER_delete_ptr(sk, ptr) ((ASN1_INTEGER *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr))) +#define sk_ASN1_INTEGER_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_pop(sk) ((ASN1_INTEGER *)OPENSSL_sk_pop(ossl_check_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_shift(sk) ((ASN1_INTEGER *)OPENSSL_sk_shift(ossl_check_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_INTEGER_sk_type(sk),ossl_check_ASN1_INTEGER_freefunc_type(freefunc)) +#define sk_ASN1_INTEGER_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr), (idx)) +#define sk_ASN1_INTEGER_set(sk, idx, ptr) ((ASN1_INTEGER *)OPENSSL_sk_set(ossl_check_ASN1_INTEGER_sk_type(sk), (idx), ossl_check_ASN1_INTEGER_type(ptr))) +#define sk_ASN1_INTEGER_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr), pnum) +#define sk_ASN1_INTEGER_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_dup(sk) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_dup(ossl_check_const_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_copyfunc_type(copyfunc), ossl_check_ASN1_INTEGER_freefunc_type(freefunc))) +#define sk_ASN1_INTEGER_set_cmp_func(sk, cmp) ((sk_ASN1_INTEGER_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_compfunc_type(cmp))) + + + +DECLARE_ASN1_FUNCTIONS(ASN1_INTEGER) +ASN1_INTEGER *d2i_ASN1_UINTEGER(ASN1_INTEGER **a, const unsigned char **pp, + long length); +DECLARE_ASN1_DUP_FUNCTION(ASN1_INTEGER) +int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y); + +DECLARE_ASN1_FUNCTIONS(ASN1_ENUMERATED) + +int ASN1_UTCTIME_check(const ASN1_UTCTIME *a); +ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s, time_t t); +ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, + int offset_day, long offset_sec); +int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str); +int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t); + +int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *a); +ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(ASN1_GENERALIZEDTIME *s, + time_t t); +ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(ASN1_GENERALIZEDTIME *s, + time_t t, int offset_day, + long offset_sec); +int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str); + +int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to); + +DECLARE_ASN1_FUNCTIONS(ASN1_OCTET_STRING) +DECLARE_ASN1_DUP_FUNCTION(ASN1_OCTET_STRING) +int ASN1_OCTET_STRING_cmp(const ASN1_OCTET_STRING *a, + const ASN1_OCTET_STRING *b); +int ASN1_OCTET_STRING_set(ASN1_OCTET_STRING *str, const unsigned char *data, + int len); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_UTF8STRING, ASN1_UTF8STRING, ASN1_UTF8STRING) +#define sk_ASN1_UTF8STRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_value(sk, idx) ((ASN1_UTF8STRING *)OPENSSL_sk_value(ossl_check_const_ASN1_UTF8STRING_sk_type(sk), (idx))) +#define sk_ASN1_UTF8STRING_new(cmp) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new(ossl_check_ASN1_UTF8STRING_compfunc_type(cmp))) +#define sk_ASN1_UTF8STRING_new_null() ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_UTF8STRING_new_reserve(cmp, n) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_UTF8STRING_compfunc_type(cmp), (n))) +#define sk_ASN1_UTF8STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_UTF8STRING_sk_type(sk), (n)) +#define sk_ASN1_UTF8STRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_delete(sk, i) ((ASN1_UTF8STRING *)OPENSSL_sk_delete(ossl_check_ASN1_UTF8STRING_sk_type(sk), (i))) +#define sk_ASN1_UTF8STRING_delete_ptr(sk, ptr) ((ASN1_UTF8STRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr))) +#define sk_ASN1_UTF8STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_pop(sk) ((ASN1_UTF8STRING *)OPENSSL_sk_pop(ossl_check_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_shift(sk) ((ASN1_UTF8STRING *)OPENSSL_sk_shift(ossl_check_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_UTF8STRING_sk_type(sk),ossl_check_ASN1_UTF8STRING_freefunc_type(freefunc)) +#define sk_ASN1_UTF8STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr), (idx)) +#define sk_ASN1_UTF8STRING_set(sk, idx, ptr) ((ASN1_UTF8STRING *)OPENSSL_sk_set(ossl_check_ASN1_UTF8STRING_sk_type(sk), (idx), ossl_check_ASN1_UTF8STRING_type(ptr))) +#define sk_ASN1_UTF8STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr), pnum) +#define sk_ASN1_UTF8STRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_dup(sk) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_copyfunc_type(copyfunc), ossl_check_ASN1_UTF8STRING_freefunc_type(freefunc))) +#define sk_ASN1_UTF8STRING_set_cmp_func(sk, cmp) ((sk_ASN1_UTF8STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(ASN1_VISIBLESTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UNIVERSALSTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UTF8STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_NULL) +DECLARE_ASN1_FUNCTIONS(ASN1_BMPSTRING) + +int UTF8_getc(const unsigned char *str, int len, unsigned long *val); +int UTF8_putc(unsigned char *str, int len, unsigned long value); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_GENERALSTRING, ASN1_GENERALSTRING, ASN1_GENERALSTRING) +#define sk_ASN1_GENERALSTRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_value(sk, idx) ((ASN1_GENERALSTRING *)OPENSSL_sk_value(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk), (idx))) +#define sk_ASN1_GENERALSTRING_new(cmp) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new(ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp))) +#define sk_ASN1_GENERALSTRING_new_null() ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_GENERALSTRING_new_reserve(cmp, n) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp), (n))) +#define sk_ASN1_GENERALSTRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (n)) +#define sk_ASN1_GENERALSTRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_delete(sk, i) ((ASN1_GENERALSTRING *)OPENSSL_sk_delete(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (i))) +#define sk_ASN1_GENERALSTRING_delete_ptr(sk, ptr) ((ASN1_GENERALSTRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr))) +#define sk_ASN1_GENERALSTRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_pop(sk) ((ASN1_GENERALSTRING *)OPENSSL_sk_pop(ossl_check_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_shift(sk) ((ASN1_GENERALSTRING *)OPENSSL_sk_shift(ossl_check_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_GENERALSTRING_sk_type(sk),ossl_check_ASN1_GENERALSTRING_freefunc_type(freefunc)) +#define sk_ASN1_GENERALSTRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr), (idx)) +#define sk_ASN1_GENERALSTRING_set(sk, idx, ptr) ((ASN1_GENERALSTRING *)OPENSSL_sk_set(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (idx), ossl_check_ASN1_GENERALSTRING_type(ptr))) +#define sk_ASN1_GENERALSTRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr), pnum) +#define sk_ASN1_GENERALSTRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_dup(sk) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_copyfunc_type(copyfunc), ossl_check_ASN1_GENERALSTRING_freefunc_type(freefunc))) +#define sk_ASN1_GENERALSTRING_set_cmp_func(sk, cmp) ((sk_ASN1_GENERALSTRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, ASN1_PRINTABLE) + +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, DIRECTORYSTRING) +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, DISPLAYTEXT) +DECLARE_ASN1_FUNCTIONS(ASN1_PRINTABLESTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_T61STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_IA5STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_GENERALSTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UTCTIME) +DECLARE_ASN1_FUNCTIONS(ASN1_GENERALIZEDTIME) +DECLARE_ASN1_FUNCTIONS(ASN1_TIME) + +DECLARE_ASN1_DUP_FUNCTION(ASN1_TIME) +DECLARE_ASN1_DUP_FUNCTION(ASN1_UTCTIME) +DECLARE_ASN1_DUP_FUNCTION(ASN1_GENERALIZEDTIME) + +DECLARE_ASN1_ITEM(ASN1_OCTET_STRING_NDEF) + +ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t); +ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t, + int offset_day, long offset_sec); +int ASN1_TIME_check(const ASN1_TIME *t); +ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(const ASN1_TIME *t, + ASN1_GENERALIZEDTIME **out); +int ASN1_TIME_set_string(ASN1_TIME *s, const char *str); +int ASN1_TIME_set_string_X509(ASN1_TIME *s, const char *str); +int ASN1_TIME_to_tm(const ASN1_TIME *s, struct tm *tm); +int ASN1_TIME_normalize(ASN1_TIME *s); +int ASN1_TIME_cmp_time_t(const ASN1_TIME *s, time_t t); +int ASN1_TIME_compare(const ASN1_TIME *a, const ASN1_TIME *b); + +int i2a_ASN1_INTEGER(BIO *bp, const ASN1_INTEGER *a); +int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size); +int i2a_ASN1_ENUMERATED(BIO *bp, const ASN1_ENUMERATED *a); +int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size); +int i2a_ASN1_OBJECT(BIO *bp, const ASN1_OBJECT *a); +int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size); +int i2a_ASN1_STRING(BIO *bp, const ASN1_STRING *a, int type); +int i2t_ASN1_OBJECT(char *buf, int buf_len, const ASN1_OBJECT *a); + +int a2d_ASN1_OBJECT(unsigned char *out, int olen, const char *buf, int num); +ASN1_OBJECT *ASN1_OBJECT_create(int nid, unsigned char *data, int len, + const char *sn, const char *ln); + +int ASN1_INTEGER_get_int64(int64_t *pr, const ASN1_INTEGER *a); +int ASN1_INTEGER_set_int64(ASN1_INTEGER *a, int64_t r); +int ASN1_INTEGER_get_uint64(uint64_t *pr, const ASN1_INTEGER *a); +int ASN1_INTEGER_set_uint64(ASN1_INTEGER *a, uint64_t r); + +int ASN1_INTEGER_set(ASN1_INTEGER *a, long v); +long ASN1_INTEGER_get(const ASN1_INTEGER *a); +ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai); +BIGNUM *ASN1_INTEGER_to_BN(const ASN1_INTEGER *ai, BIGNUM *bn); + +int ASN1_ENUMERATED_get_int64(int64_t *pr, const ASN1_ENUMERATED *a); +int ASN1_ENUMERATED_set_int64(ASN1_ENUMERATED *a, int64_t r); + + +int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v); +long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a); +ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn, ASN1_ENUMERATED *ai); +BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai, BIGNUM *bn); + +/* General */ +/* given a string, return the correct type, max is the maximum length */ +int ASN1_PRINTABLE_type(const unsigned char *s, int max); + +unsigned long ASN1_tag2bit(int tag); + +/* SPECIALS */ +int ASN1_get_object(const unsigned char **pp, long *plength, int *ptag, + int *pclass, long omax); +int ASN1_check_infinite_end(unsigned char **p, long len); +int ASN1_const_check_infinite_end(const unsigned char **p, long len); +void ASN1_put_object(unsigned char **pp, int constructed, int length, + int tag, int xclass); +int ASN1_put_eoc(unsigned char **pp); +int ASN1_object_size(int constructed, int length, int tag); + +/* Used to implement other functions */ +void *ASN1_dup(i2d_of_void *i2d, d2i_of_void *d2i, const void *x); + +# define ASN1_dup_of(type,i2d,d2i,x) \ + ((type*)ASN1_dup(CHECKED_I2D_OF(type, i2d), \ + CHECKED_D2I_OF(type, d2i), \ + CHECKED_PTR_OF(const type, x))) + +void *ASN1_item_dup(const ASN1_ITEM *it, const void *x); +int ASN1_item_sign_ex(const ASN1_ITEM *it, X509_ALGOR *algor1, + X509_ALGOR *algor2, ASN1_BIT_STRING *signature, + const void *data, const ASN1_OCTET_STRING *id, + EVP_PKEY *pkey, const EVP_MD *md, OSSL_LIB_CTX *libctx, + const char *propq); +int ASN1_item_verify_ex(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + const ASN1_OCTET_STRING *id, EVP_PKEY *pkey, + OSSL_LIB_CTX *libctx, const char *propq); + +/* ASN1 alloc/free macros for when a type is only used internally */ + +# define M_ASN1_new_of(type) (type *)ASN1_item_new(ASN1_ITEM_rptr(type)) +# define M_ASN1_free_of(x, type) \ + ASN1_item_free(CHECKED_PTR_OF(type, x), ASN1_ITEM_rptr(type)) + +# ifndef OPENSSL_NO_STDIO +void *ASN1_d2i_fp(void *(*xnew) (void), d2i_of_void *d2i, FILE *in, void **x); + +# define ASN1_d2i_fp_of(type,xnew,d2i,in,x) \ + ((type*)ASN1_d2i_fp(CHECKED_NEW_OF(type, xnew), \ + CHECKED_D2I_OF(type, d2i), \ + in, \ + CHECKED_PPTR_OF(type, x))) + +void *ASN1_item_d2i_fp_ex(const ASN1_ITEM *it, FILE *in, void *x, + OSSL_LIB_CTX *libctx, const char *propq); +void *ASN1_item_d2i_fp(const ASN1_ITEM *it, FILE *in, void *x); +int ASN1_i2d_fp(i2d_of_void *i2d, FILE *out, const void *x); + +# define ASN1_i2d_fp_of(type,i2d,out,x) \ + (ASN1_i2d_fp(CHECKED_I2D_OF(type, i2d), \ + out, \ + CHECKED_PTR_OF(const type, x))) + +int ASN1_item_i2d_fp(const ASN1_ITEM *it, FILE *out, const void *x); +int ASN1_STRING_print_ex_fp(FILE *fp, const ASN1_STRING *str, unsigned long flags); +# endif + +int ASN1_STRING_to_UTF8(unsigned char **out, const ASN1_STRING *in); + +void *ASN1_d2i_bio(void *(*xnew) (void), d2i_of_void *d2i, BIO *in, void **x); + +# define ASN1_d2i_bio_of(type,xnew,d2i,in,x) \ + ((type*)ASN1_d2i_bio( CHECKED_NEW_OF(type, xnew), \ + CHECKED_D2I_OF(type, d2i), \ + in, \ + CHECKED_PPTR_OF(type, x))) + +void *ASN1_item_d2i_bio_ex(const ASN1_ITEM *it, BIO *in, void *pval, + OSSL_LIB_CTX *libctx, const char *propq); +void *ASN1_item_d2i_bio(const ASN1_ITEM *it, BIO *in, void *pval); +int ASN1_i2d_bio(i2d_of_void *i2d, BIO *out, const void *x); + +# define ASN1_i2d_bio_of(type,i2d,out,x) \ + (ASN1_i2d_bio(CHECKED_I2D_OF(type, i2d), \ + out, \ + CHECKED_PTR_OF(const type, x))) + +int ASN1_item_i2d_bio(const ASN1_ITEM *it, BIO *out, const void *x); +BIO *ASN1_item_i2d_mem_bio(const ASN1_ITEM *it, const ASN1_VALUE *val); +int ASN1_UTCTIME_print(BIO *fp, const ASN1_UTCTIME *a); +int ASN1_GENERALIZEDTIME_print(BIO *fp, const ASN1_GENERALIZEDTIME *a); +int ASN1_TIME_print(BIO *bp, const ASN1_TIME *tm); +int ASN1_TIME_print_ex(BIO *bp, const ASN1_TIME *tm, unsigned long flags); +int ASN1_STRING_print(BIO *bp, const ASN1_STRING *v); +int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags); +int ASN1_buf_print(BIO *bp, const unsigned char *buf, size_t buflen, int off); +int ASN1_bn_print(BIO *bp, const char *number, const BIGNUM *num, + unsigned char *buf, int off); +int ASN1_parse(BIO *bp, const unsigned char *pp, long len, int indent); +int ASN1_parse_dump(BIO *bp, const unsigned char *pp, long len, int indent, + int dump); +const char *ASN1_tag2str(int tag); + +/* Used to load and write Netscape format cert */ + +int ASN1_UNIVERSALSTRING_to_string(ASN1_UNIVERSALSTRING *s); + +int ASN1_TYPE_set_octetstring(ASN1_TYPE *a, unsigned char *data, int len); +int ASN1_TYPE_get_octetstring(const ASN1_TYPE *a, unsigned char *data, int max_len); +int ASN1_TYPE_set_int_octetstring(ASN1_TYPE *a, long num, + unsigned char *data, int len); +int ASN1_TYPE_get_int_octetstring(const ASN1_TYPE *a, long *num, + unsigned char *data, int max_len); + +void *ASN1_item_unpack(const ASN1_STRING *oct, const ASN1_ITEM *it); +void *ASN1_item_unpack_ex(const ASN1_STRING *oct, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); + +ASN1_STRING *ASN1_item_pack(void *obj, const ASN1_ITEM *it, + ASN1_OCTET_STRING **oct); + +void ASN1_STRING_set_default_mask(unsigned long mask); +int ASN1_STRING_set_default_mask_asc(const char *p); +unsigned long ASN1_STRING_get_default_mask(void); +int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len, + int inform, unsigned long mask); +int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, + int inform, unsigned long mask, + long minsize, long maxsize); + +ASN1_STRING *ASN1_STRING_set_by_NID(ASN1_STRING **out, + const unsigned char *in, int inlen, + int inform, int nid); +ASN1_STRING_TABLE *ASN1_STRING_TABLE_get(int nid); +int ASN1_STRING_TABLE_add(int, long, long, unsigned long, unsigned long); +void ASN1_STRING_TABLE_cleanup(void); + +/* ASN1 template functions */ + +/* Old API compatible functions */ +ASN1_VALUE *ASN1_item_new(const ASN1_ITEM *it); +ASN1_VALUE *ASN1_item_new_ex(const ASN1_ITEM *it, OSSL_LIB_CTX *libctx, + const char *propq); +void ASN1_item_free(ASN1_VALUE *val, const ASN1_ITEM *it); +ASN1_VALUE *ASN1_item_d2i_ex(ASN1_VALUE **val, const unsigned char **in, + long len, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +ASN1_VALUE *ASN1_item_d2i(ASN1_VALUE **val, const unsigned char **in, + long len, const ASN1_ITEM *it); +int ASN1_item_i2d(const ASN1_VALUE *val, unsigned char **out, const ASN1_ITEM *it); +int ASN1_item_ndef_i2d(const ASN1_VALUE *val, unsigned char **out, + const ASN1_ITEM *it); + +void ASN1_add_oid_module(void); +void ASN1_add_stable_module(void); + +ASN1_TYPE *ASN1_generate_nconf(const char *str, CONF *nconf); +ASN1_TYPE *ASN1_generate_v3(const char *str, X509V3_CTX *cnf); +int ASN1_str2mask(const char *str, unsigned long *pmask); + +/* ASN1 Print flags */ + +/* Indicate missing OPTIONAL fields */ +# define ASN1_PCTX_FLAGS_SHOW_ABSENT 0x001 +/* Mark start and end of SEQUENCE */ +# define ASN1_PCTX_FLAGS_SHOW_SEQUENCE 0x002 +/* Mark start and end of SEQUENCE/SET OF */ +# define ASN1_PCTX_FLAGS_SHOW_SSOF 0x004 +/* Show the ASN1 type of primitives */ +# define ASN1_PCTX_FLAGS_SHOW_TYPE 0x008 +/* Don't show ASN1 type of ANY */ +# define ASN1_PCTX_FLAGS_NO_ANY_TYPE 0x010 +/* Don't show ASN1 type of MSTRINGs */ +# define ASN1_PCTX_FLAGS_NO_MSTRING_TYPE 0x020 +/* Don't show field names in SEQUENCE */ +# define ASN1_PCTX_FLAGS_NO_FIELD_NAME 0x040 +/* Show structure names of each SEQUENCE field */ +# define ASN1_PCTX_FLAGS_SHOW_FIELD_STRUCT_NAME 0x080 +/* Don't show structure name even at top level */ +# define ASN1_PCTX_FLAGS_NO_STRUCT_NAME 0x100 + +int ASN1_item_print(BIO *out, const ASN1_VALUE *ifld, int indent, + const ASN1_ITEM *it, const ASN1_PCTX *pctx); +ASN1_PCTX *ASN1_PCTX_new(void); +void ASN1_PCTX_free(ASN1_PCTX *p); +unsigned long ASN1_PCTX_get_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_nm_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_nm_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_cert_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_cert_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_oid_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_oid_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_str_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_str_flags(ASN1_PCTX *p, unsigned long flags); + +ASN1_SCTX *ASN1_SCTX_new(int (*scan_cb) (ASN1_SCTX *ctx)); +void ASN1_SCTX_free(ASN1_SCTX *p); +const ASN1_ITEM *ASN1_SCTX_get_item(ASN1_SCTX *p); +const ASN1_TEMPLATE *ASN1_SCTX_get_template(ASN1_SCTX *p); +unsigned long ASN1_SCTX_get_flags(ASN1_SCTX *p); +void ASN1_SCTX_set_app_data(ASN1_SCTX *p, void *data); +void *ASN1_SCTX_get_app_data(ASN1_SCTX *p); + +const BIO_METHOD *BIO_f_asn1(void); + +/* cannot constify val because of CMS_stream() */ +BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it); + +int i2d_ASN1_bio_stream(BIO *out, ASN1_VALUE *val, BIO *in, int flags, + const ASN1_ITEM *it); +int PEM_write_bio_ASN1_stream(BIO *out, ASN1_VALUE *val, BIO *in, int flags, + const char *hdr, const ASN1_ITEM *it); +/* cannot constify val because of CMS_dataFinal() */ +int SMIME_write_ASN1(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, + int ctype_nid, int econt_nid, + STACK_OF(X509_ALGOR) *mdalgs, const ASN1_ITEM *it); +int SMIME_write_ASN1_ex(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, + int ctype_nid, int econt_nid, + STACK_OF(X509_ALGOR) *mdalgs, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +ASN1_VALUE *SMIME_read_ASN1(BIO *bio, BIO **bcont, const ASN1_ITEM *it); +ASN1_VALUE *SMIME_read_ASN1_ex(BIO *bio, int flags, BIO **bcont, + const ASN1_ITEM *it, ASN1_VALUE **x, + OSSL_LIB_CTX *libctx, const char *propq); +int SMIME_crlf_copy(BIO *in, BIO *out, int flags); +int SMIME_text(BIO *in, BIO *out); + +const ASN1_ITEM *ASN1_ITEM_lookup(const char *name); +const ASN1_ITEM *ASN1_ITEM_get(size_t i); + +/* Legacy compatibility */ +# define DECLARE_ASN1_FUNCTIONS_fname(type, itname, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) +# define DECLARE_ASN1_FUNCTIONS_const(type) DECLARE_ASN1_FUNCTIONS(type) +# define DECLARE_ASN1_ENCODE_FUNCTIONS_const(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS(type, name) +# define I2D_OF_const(type) I2D_OF(type) +# define ASN1_dup_of_const(type,i2d,d2i,x) ASN1_dup_of(type,i2d,d2i,x) +# define ASN1_i2d_fp_of_const(type,i2d,out,x) ASN1_i2d_fp_of(type,i2d,out,x) +# define ASN1_i2d_bio_of_const(type,i2d,out,x) ASN1_i2d_bio_of(type,i2d,out,x) + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h new file mode 100644 index 00000000000..74ba47d0cf2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/asn1t.h @@ -0,0 +1,946 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/asn1t.h.in + * + * Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ASN1T_H +# define OPENSSL_ASN1T_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ASN1T_H +# endif + +# include +# include +# include + +# ifdef OPENSSL_BUILD_SHLIBCRYPTO +# undef OPENSSL_EXTERN +# define OPENSSL_EXTERN OPENSSL_EXPORT +# endif + +/* ASN1 template defines, structures and functions */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*- + * These are the possible values for the itype field of the + * ASN1_ITEM structure and determine how it is interpreted. + * + * For PRIMITIVE types the underlying type + * determines the behaviour if items is NULL. + * + * Otherwise templates must contain a single + * template and the type is treated in the + * same way as the type specified in the template. + * + * For SEQUENCE types the templates field points + * to the members, the size field is the + * structure size. + * + * For CHOICE types the templates field points + * to each possible member (typically a union) + * and the 'size' field is the offset of the + * selector. + * + * The 'funcs' field is used for application-specific + * data and functions. + * + * The EXTERN type uses a new style d2i/i2d. + * The new style should be used where possible + * because it avoids things like the d2i IMPLICIT + * hack. + * + * MSTRING is a multiple string type, it is used + * for a CHOICE of character strings where the + * actual strings all occupy an ASN1_STRING + * structure. In this case the 'utype' field + * has a special meaning, it is used as a mask + * of acceptable types using the B_ASN1 constants. + * + * NDEF_SEQUENCE is the same as SEQUENCE except + * that it will use indefinite length constructed + * encoding if requested. + * + */ + +# define ASN1_ITYPE_PRIMITIVE 0x0 +# define ASN1_ITYPE_SEQUENCE 0x1 +# define ASN1_ITYPE_CHOICE 0x2 +/* unused value 0x3 */ +# define ASN1_ITYPE_EXTERN 0x4 +# define ASN1_ITYPE_MSTRING 0x5 +# define ASN1_ITYPE_NDEF_SEQUENCE 0x6 + +/* Macro to obtain ASN1_ADB pointer from a type (only used internally) */ +# define ASN1_ADB_ptr(iptr) ((const ASN1_ADB *)((iptr)())) + +/* Macros for start and end of ASN1_ITEM definition */ + +# define ASN1_ITEM_start(itname) \ + const ASN1_ITEM * itname##_it(void) \ + { \ + static const ASN1_ITEM local_it = { + +# define static_ASN1_ITEM_start(itname) \ + static ASN1_ITEM_start(itname) + +# define ASN1_ITEM_end(itname) \ + }; \ + return &local_it; \ + } + +/* Macros to aid ASN1 template writing */ + +# define ASN1_ITEM_TEMPLATE(tname) \ + static const ASN1_TEMPLATE tname##_item_tt + +# define ASN1_ITEM_TEMPLATE_END(tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_PRIMITIVE,\ + -1,\ + &tname##_item_tt,\ + 0,\ + NULL,\ + 0,\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_ITEM_TEMPLATE_END(tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_PRIMITIVE,\ + -1,\ + &tname##_item_tt,\ + 0,\ + NULL,\ + 0,\ + #tname \ + ASN1_ITEM_end(tname) + +/* This is a ASN1 type which just embeds a template */ + +/*- + * This pair helps declare a SEQUENCE. We can do: + * + * ASN1_SEQUENCE(stname) = { + * ... SEQUENCE components ... + * } ASN1_SEQUENCE_END(stname) + * + * This will produce an ASN1_ITEM called stname_it + * for a structure called stname. + * + * If you want the same structure but a different + * name then use: + * + * ASN1_SEQUENCE(itname) = { + * ... SEQUENCE components ... + * } ASN1_SEQUENCE_END_name(stname, itname) + * + * This will create an item called itname_it using + * a structure called stname. + */ + +# define ASN1_SEQUENCE(tname) \ + static const ASN1_TEMPLATE tname##_seq_tt[] + +# define ASN1_SEQUENCE_END(stname) ASN1_SEQUENCE_END_name(stname, stname) + +# define static_ASN1_SEQUENCE_END(stname) static_ASN1_SEQUENCE_END_name(stname, stname) + +# define ASN1_SEQUENCE_END_name(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #tname \ + ASN1_ITEM_end(tname) + +# define static_ASN1_SEQUENCE_END_name(stname, tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_NDEF_SEQUENCE(tname) \ + ASN1_SEQUENCE(tname) + +# define ASN1_NDEF_SEQUENCE_cb(tname, cb) \ + ASN1_SEQUENCE_cb(tname, cb) + +# define ASN1_SEQUENCE_cb(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, 0, 0, 0, cb, 0, NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_const_cb(tname, const_cb) \ + static const ASN1_AUX tname##_aux = \ + {NULL, ASN1_AFLG_CONST_CB, 0, 0, NULL, 0, const_cb}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_cb_const_cb(tname, cb, const_cb) \ + static const ASN1_AUX tname##_aux = \ + {NULL, ASN1_AFLG_CONST_CB, 0, 0, cb, 0, const_cb}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_ref(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_REFCOUNT, offsetof(tname, references), offsetof(tname, lock), cb, 0, NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_enc(tname, enc, cb) \ + static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_ENCODING, 0, 0, cb, offsetof(tname, enc), NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_NDEF_SEQUENCE_END(tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(tname),\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_NDEF_SEQUENCE_END(tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(tname),\ + #tname \ + ASN1_ITEM_end(tname) + + +# define ASN1_SEQUENCE_END_enc(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname) + +# define ASN1_SEQUENCE_END_cb(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname) +# define static_ASN1_SEQUENCE_END_cb(stname, tname) static_ASN1_SEQUENCE_END_ref(stname, tname) + +# define ASN1_SEQUENCE_END_ref(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_SEQUENCE_END_ref(stname, tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_NDEF_SEQUENCE_END_cb(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +/*- + * This pair helps declare a CHOICE type. We can do: + * + * ASN1_CHOICE(chname) = { + * ... CHOICE options ... + * ASN1_CHOICE_END(chname) + * + * This will produce an ASN1_ITEM called chname_it + * for a structure called chname. The structure + * definition must look like this: + * typedef struct { + * int type; + * union { + * ASN1_SOMETHING *opt1; + * ASN1_SOMEOTHER *opt2; + * } value; + * } chname; + * + * the name of the selector must be 'type'. + * to use an alternative selector name use the + * ASN1_CHOICE_END_selector() version. + */ + +# define ASN1_CHOICE(tname) \ + static const ASN1_TEMPLATE tname##_ch_tt[] + +# define ASN1_CHOICE_cb(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, 0, 0, 0, cb, 0, NULL}; \ + ASN1_CHOICE(tname) + +# define ASN1_CHOICE_END(stname) ASN1_CHOICE_END_name(stname, stname) + +# define static_ASN1_CHOICE_END(stname) static_ASN1_CHOICE_END_name(stname, stname) + +# define ASN1_CHOICE_END_name(stname, tname) ASN1_CHOICE_END_selector(stname, tname, type) + +# define static_ASN1_CHOICE_END_name(stname, tname) static_ASN1_CHOICE_END_selector(stname, tname, type) + +# define ASN1_CHOICE_END_selector(stname, tname, selname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define static_ASN1_CHOICE_END_selector(stname, tname, selname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_CHOICE_END_cb(stname, tname, selname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +/* This helps with the template wrapper form of ASN1_ITEM */ + +# define ASN1_EX_TEMPLATE_TYPE(flags, tag, name, type) { \ + (flags), (tag), 0,\ + #name, ASN1_ITEM_ref(type) } + +/* These help with SEQUENCE or CHOICE components */ + +/* used to declare other types */ + +# define ASN1_EX_TYPE(flags, tag, stname, field, type) { \ + (flags), (tag), offsetof(stname, field),\ + #field, ASN1_ITEM_ref(type) } + +/* implicit and explicit helper macros */ + +# define ASN1_IMP_EX(stname, field, type, tag, ex) \ + ASN1_EX_TYPE(ASN1_TFLG_IMPLICIT | (ex), tag, stname, field, type) + +# define ASN1_EXP_EX(stname, field, type, tag, ex) \ + ASN1_EX_TYPE(ASN1_TFLG_EXPLICIT | (ex), tag, stname, field, type) + +/* Any defined by macros: the field used is in the table itself */ + +# define ASN1_ADB_OBJECT(tblname) { ASN1_TFLG_ADB_OID, -1, 0, #tblname, tblname##_adb } +# define ASN1_ADB_INTEGER(tblname) { ASN1_TFLG_ADB_INT, -1, 0, #tblname, tblname##_adb } + +/* Plain simple type */ +# define ASN1_SIMPLE(stname, field, type) ASN1_EX_TYPE(0,0, stname, field, type) +/* Embedded simple type */ +# define ASN1_EMBED(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_EMBED,0, stname, field, type) + +/* OPTIONAL simple type */ +# define ASN1_OPT(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_OPTIONAL, 0, stname, field, type) +# define ASN1_OPT_EMBED(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED, 0, stname, field, type) + +/* IMPLICIT tagged simple type */ +# define ASN1_IMP(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, 0) +# define ASN1_IMP_EMBED(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_EMBED) + +/* IMPLICIT tagged OPTIONAL simple type */ +# define ASN1_IMP_OPT(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL) +# define ASN1_IMP_OPT_EMBED(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED) + +/* Same as above but EXPLICIT */ + +# define ASN1_EXP(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, 0) +# define ASN1_EXP_EMBED(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_EMBED) +# define ASN1_EXP_OPT(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL) +# define ASN1_EXP_OPT_EMBED(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED) + +/* SEQUENCE OF type */ +# define ASN1_SEQUENCE_OF(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SEQUENCE_OF, 0, stname, field, type) + +/* OPTIONAL SEQUENCE OF */ +# define ASN1_SEQUENCE_OF_OPT(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL, 0, stname, field, type) + +/* Same as above but for SET OF */ + +# define ASN1_SET_OF(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SET_OF, 0, stname, field, type) + +# define ASN1_SET_OF_OPT(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL, 0, stname, field, type) + +/* Finally compound types of SEQUENCE, SET, IMPLICIT, EXPLICIT and OPTIONAL */ + +# define ASN1_IMP_SET_OF(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF) + +# define ASN1_EXP_SET_OF(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF) + +# define ASN1_IMP_SET_OF_OPT(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_EXP_SET_OF_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_IMP_SEQUENCE_OF(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF) + +# define ASN1_IMP_SEQUENCE_OF_OPT(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_EXP_SEQUENCE_OF(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF) + +# define ASN1_EXP_SEQUENCE_OF_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL) + +/* EXPLICIT using indefinite length constructed form */ +# define ASN1_NDEF_EXP(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_NDEF) + +/* EXPLICIT OPTIONAL using indefinite length constructed form */ +# define ASN1_NDEF_EXP_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_NDEF) + +/* Macros for the ASN1_ADB structure */ + +# define ASN1_ADB(name) \ + static const ASN1_ADB_TABLE name##_adbtbl[] + +# define ASN1_ADB_END(name, flags, field, adb_cb, def, none) \ + ;\ + static const ASN1_ITEM *name##_adb(void) \ + { \ + static const ASN1_ADB internal_adb = \ + {\ + flags,\ + offsetof(name, field),\ + adb_cb,\ + name##_adbtbl,\ + sizeof(name##_adbtbl) / sizeof(ASN1_ADB_TABLE),\ + def,\ + none\ + }; \ + return (const ASN1_ITEM *) &internal_adb; \ + } \ + void dummy_function(void) + +# define ADB_ENTRY(val, template) {val, template} + +# define ASN1_ADB_TEMPLATE(name) \ + static const ASN1_TEMPLATE name##_tt + +/* + * This is the ASN1 template structure that defines a wrapper round the + * actual type. It determines the actual position of the field in the value + * structure, various flags such as OPTIONAL and the field name. + */ + +struct ASN1_TEMPLATE_st { + unsigned long flags; /* Various flags */ + long tag; /* tag, not used if no tagging */ + unsigned long offset; /* Offset of this field in structure */ + const char *field_name; /* Field name */ + ASN1_ITEM_EXP *item; /* Relevant ASN1_ITEM or ASN1_ADB */ +}; + +/* Macro to extract ASN1_ITEM and ASN1_ADB pointer from ASN1_TEMPLATE */ + +# define ASN1_TEMPLATE_item(t) (t->item_ptr) +# define ASN1_TEMPLATE_adb(t) (t->item_ptr) + +typedef struct ASN1_ADB_TABLE_st ASN1_ADB_TABLE; +typedef struct ASN1_ADB_st ASN1_ADB; + +struct ASN1_ADB_st { + unsigned long flags; /* Various flags */ + unsigned long offset; /* Offset of selector field */ + int (*adb_cb)(long *psel); /* Application callback */ + const ASN1_ADB_TABLE *tbl; /* Table of possible types */ + long tblcount; /* Number of entries in tbl */ + const ASN1_TEMPLATE *default_tt; /* Type to use if no match */ + const ASN1_TEMPLATE *null_tt; /* Type to use if selector is NULL */ +}; + +struct ASN1_ADB_TABLE_st { + long value; /* NID for an object or value for an int */ + const ASN1_TEMPLATE tt; /* item for this value */ +}; + +/* template flags */ + +/* Field is optional */ +# define ASN1_TFLG_OPTIONAL (0x1) + +/* Field is a SET OF */ +# define ASN1_TFLG_SET_OF (0x1 << 1) + +/* Field is a SEQUENCE OF */ +# define ASN1_TFLG_SEQUENCE_OF (0x2 << 1) + +/* + * Special case: this refers to a SET OF that will be sorted into DER order + * when encoded *and* the corresponding STACK will be modified to match the + * new order. + */ +# define ASN1_TFLG_SET_ORDER (0x3 << 1) + +/* Mask for SET OF or SEQUENCE OF */ +# define ASN1_TFLG_SK_MASK (0x3 << 1) + +/* + * These flags mean the tag should be taken from the tag field. If EXPLICIT + * then the underlying type is used for the inner tag. + */ + +/* IMPLICIT tagging */ +# define ASN1_TFLG_IMPTAG (0x1 << 3) + +/* EXPLICIT tagging, inner tag from underlying type */ +# define ASN1_TFLG_EXPTAG (0x2 << 3) + +# define ASN1_TFLG_TAG_MASK (0x3 << 3) + +/* context specific IMPLICIT */ +# define ASN1_TFLG_IMPLICIT (ASN1_TFLG_IMPTAG|ASN1_TFLG_CONTEXT) + +/* context specific EXPLICIT */ +# define ASN1_TFLG_EXPLICIT (ASN1_TFLG_EXPTAG|ASN1_TFLG_CONTEXT) + +/* + * If tagging is in force these determine the type of tag to use. Otherwise + * the tag is determined by the underlying type. These values reflect the + * actual octet format. + */ + +/* Universal tag */ +# define ASN1_TFLG_UNIVERSAL (0x0<<6) +/* Application tag */ +# define ASN1_TFLG_APPLICATION (0x1<<6) +/* Context specific tag */ +# define ASN1_TFLG_CONTEXT (0x2<<6) +/* Private tag */ +# define ASN1_TFLG_PRIVATE (0x3<<6) + +# define ASN1_TFLG_TAG_CLASS (0x3<<6) + +/* + * These are for ANY DEFINED BY type. In this case the 'item' field points to + * an ASN1_ADB structure which contains a table of values to decode the + * relevant type + */ + +# define ASN1_TFLG_ADB_MASK (0x3<<8) + +# define ASN1_TFLG_ADB_OID (0x1<<8) + +# define ASN1_TFLG_ADB_INT (0x1<<9) + +/* + * This flag when present in a SEQUENCE OF, SET OF or EXPLICIT causes + * indefinite length constructed encoding to be used if required. + */ + +# define ASN1_TFLG_NDEF (0x1<<11) + +/* Field is embedded and not a pointer */ +# define ASN1_TFLG_EMBED (0x1 << 12) + +/* This is the actual ASN1 item itself */ + +struct ASN1_ITEM_st { + char itype; /* The item type, primitive, SEQUENCE, CHOICE + * or extern */ + long utype; /* underlying type */ + const ASN1_TEMPLATE *templates; /* If SEQUENCE or CHOICE this contains + * the contents */ + long tcount; /* Number of templates if SEQUENCE or CHOICE */ + const void *funcs; /* further data and type-specific functions */ + /* funcs can be ASN1_PRIMITIVE_FUNCS*, ASN1_EXTERN_FUNCS*, or ASN1_AUX* */ + long size; /* Structure size (usually) */ + const char *sname; /* Structure name */ +}; + +/* + * Cache for ASN1 tag and length, so we don't keep re-reading it for things + * like CHOICE + */ + +struct ASN1_TLC_st { + char valid; /* Values below are valid */ + int ret; /* return value */ + long plen; /* length */ + int ptag; /* class value */ + int pclass; /* class value */ + int hdrlen; /* header length */ +}; + +/* Typedefs for ASN1 function pointers */ +typedef int ASN1_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx); + +typedef int ASN1_ex_d2i_ex(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx, OSSL_LIB_CTX *libctx, + const char *propq); +typedef int ASN1_ex_i2d(const ASN1_VALUE **pval, unsigned char **out, + const ASN1_ITEM *it, int tag, int aclass); +typedef int ASN1_ex_new_func(ASN1_VALUE **pval, const ASN1_ITEM *it); +typedef int ASN1_ex_new_ex_func(ASN1_VALUE **pval, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +typedef void ASN1_ex_free_func(ASN1_VALUE **pval, const ASN1_ITEM *it); + +typedef int ASN1_ex_print_func(BIO *out, const ASN1_VALUE **pval, + int indent, const char *fname, + const ASN1_PCTX *pctx); + +typedef int ASN1_primitive_i2c(const ASN1_VALUE **pval, unsigned char *cont, + int *putype, const ASN1_ITEM *it); +typedef int ASN1_primitive_c2i(ASN1_VALUE **pval, const unsigned char *cont, + int len, int utype, char *free_cont, + const ASN1_ITEM *it); +typedef int ASN1_primitive_print(BIO *out, const ASN1_VALUE **pval, + const ASN1_ITEM *it, int indent, + const ASN1_PCTX *pctx); + +typedef struct ASN1_EXTERN_FUNCS_st { + void *app_data; + ASN1_ex_new_func *asn1_ex_new; + ASN1_ex_free_func *asn1_ex_free; + ASN1_ex_free_func *asn1_ex_clear; + ASN1_ex_d2i *asn1_ex_d2i; + ASN1_ex_i2d *asn1_ex_i2d; + ASN1_ex_print_func *asn1_ex_print; + ASN1_ex_new_ex_func *asn1_ex_new_ex; + ASN1_ex_d2i_ex *asn1_ex_d2i_ex; +} ASN1_EXTERN_FUNCS; + +typedef struct ASN1_PRIMITIVE_FUNCS_st { + void *app_data; + unsigned long flags; + ASN1_ex_new_func *prim_new; + ASN1_ex_free_func *prim_free; + ASN1_ex_free_func *prim_clear; + ASN1_primitive_c2i *prim_c2i; + ASN1_primitive_i2c *prim_i2c; + ASN1_primitive_print *prim_print; +} ASN1_PRIMITIVE_FUNCS; + +/* + * This is the ASN1_AUX structure: it handles various miscellaneous + * requirements. For example the use of reference counts and an informational + * callback. The "informational callback" is called at various points during + * the ASN1 encoding and decoding. It can be used to provide minor + * customisation of the structures used. This is most useful where the + * supplied routines *almost* do the right thing but need some extra help at + * a few points. If the callback returns zero then it is assumed a fatal + * error has occurred and the main operation should be abandoned. If major + * changes in the default behaviour are required then an external type is + * more appropriate. + * For the operations ASN1_OP_I2D_PRE, ASN1_OP_I2D_POST, ASN1_OP_PRINT_PRE, and + * ASN1_OP_PRINT_POST, meanwhile a variant of the callback with const parameter + * 'in' is provided to make clear statically that its input is not modified. If + * and only if this variant is in use the flag ASN1_AFLG_CONST_CB must be set. + */ + +typedef int ASN1_aux_cb(int operation, ASN1_VALUE **in, const ASN1_ITEM *it, + void *exarg); +typedef int ASN1_aux_const_cb(int operation, const ASN1_VALUE **in, + const ASN1_ITEM *it, void *exarg); + +typedef struct ASN1_AUX_st { + void *app_data; + int flags; + int ref_offset; /* Offset of reference value */ + int ref_lock; /* Offset of lock value */ + ASN1_aux_cb *asn1_cb; + int enc_offset; /* Offset of ASN1_ENCODING structure */ + ASN1_aux_const_cb *asn1_const_cb; /* for ASN1_OP_I2D_ and ASN1_OP_PRINT_ */ +} ASN1_AUX; + +/* For print related callbacks exarg points to this structure */ +typedef struct ASN1_PRINT_ARG_st { + BIO *out; + int indent; + const ASN1_PCTX *pctx; +} ASN1_PRINT_ARG; + +/* For streaming related callbacks exarg points to this structure */ +typedef struct ASN1_STREAM_ARG_st { + /* BIO to stream through */ + BIO *out; + /* BIO with filters appended */ + BIO *ndef_bio; + /* Streaming I/O boundary */ + unsigned char **boundary; +} ASN1_STREAM_ARG; + +/* Flags in ASN1_AUX */ + +/* Use a reference count */ +# define ASN1_AFLG_REFCOUNT 1 +/* Save the encoding of structure (useful for signatures) */ +# define ASN1_AFLG_ENCODING 2 +/* The Sequence length is invalid */ +# define ASN1_AFLG_BROKEN 4 +/* Use the new asn1_const_cb */ +# define ASN1_AFLG_CONST_CB 8 + +/* operation values for asn1_cb */ + +# define ASN1_OP_NEW_PRE 0 +# define ASN1_OP_NEW_POST 1 +# define ASN1_OP_FREE_PRE 2 +# define ASN1_OP_FREE_POST 3 +# define ASN1_OP_D2I_PRE 4 +# define ASN1_OP_D2I_POST 5 +# define ASN1_OP_I2D_PRE 6 +# define ASN1_OP_I2D_POST 7 +# define ASN1_OP_PRINT_PRE 8 +# define ASN1_OP_PRINT_POST 9 +# define ASN1_OP_STREAM_PRE 10 +# define ASN1_OP_STREAM_POST 11 +# define ASN1_OP_DETACHED_PRE 12 +# define ASN1_OP_DETACHED_POST 13 +# define ASN1_OP_DUP_PRE 14 +# define ASN1_OP_DUP_POST 15 +# define ASN1_OP_GET0_LIBCTX 16 +# define ASN1_OP_GET0_PROPQ 17 + +/* Macro to implement a primitive type */ +# define IMPLEMENT_ASN1_TYPE(stname) IMPLEMENT_ASN1_TYPE_ex(stname, stname, 0) +# define IMPLEMENT_ASN1_TYPE_ex(itname, vname, ex) \ + ASN1_ITEM_start(itname) \ + ASN1_ITYPE_PRIMITIVE, V_##vname, NULL, 0, NULL, ex, #itname \ + ASN1_ITEM_end(itname) + +/* Macro to implement a multi string type */ +# define IMPLEMENT_ASN1_MSTRING(itname, mask) \ + ASN1_ITEM_start(itname) \ + ASN1_ITYPE_MSTRING, mask, NULL, 0, NULL, sizeof(ASN1_STRING), #itname \ + ASN1_ITEM_end(itname) + +# define IMPLEMENT_EXTERN_ASN1(sname, tag, fptrs) \ + ASN1_ITEM_start(sname) \ + ASN1_ITYPE_EXTERN, \ + tag, \ + NULL, \ + 0, \ + &fptrs, \ + 0, \ + #sname \ + ASN1_ITEM_end(sname) + +/* Macro to implement standard functions in terms of ASN1_ITEM structures */ + +# define IMPLEMENT_ASN1_FUNCTIONS(stname) IMPLEMENT_ASN1_FUNCTIONS_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_FUNCTIONS_name(stname, itname) IMPLEMENT_ASN1_FUNCTIONS_fname(stname, itname, itname) + +# define IMPLEMENT_ASN1_FUNCTIONS_ENCODE_name(stname, itname) \ + IMPLEMENT_ASN1_FUNCTIONS_ENCODE_fname(stname, itname, itname) + +# define IMPLEMENT_STATIC_ASN1_ALLOC_FUNCTIONS(stname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_pfname(static, stname, stname, stname) + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS(stname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS_pfname(pre, stname, itname, fname) \ + pre stname *fname##_new(void) \ + { \ + return (stname *)ASN1_item_new(ASN1_ITEM_rptr(itname)); \ + } \ + pre void fname##_free(stname *a) \ + { \ + ASN1_item_free((ASN1_VALUE *)a, ASN1_ITEM_rptr(itname)); \ + } + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, itname, fname) \ + stname *fname##_new(void) \ + { \ + return (stname *)ASN1_item_new(ASN1_ITEM_rptr(itname)); \ + } \ + void fname##_free(stname *a) \ + { \ + ASN1_item_free((ASN1_VALUE *)a, ASN1_ITEM_rptr(itname)); \ + } + +# define IMPLEMENT_ASN1_FUNCTIONS_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, itname, fname) + +# define IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) \ + stname *d2i_##fname(stname **a, const unsigned char **in, long len) \ + { \ + return (stname *)ASN1_item_d2i((ASN1_VALUE **)a, in, len, ASN1_ITEM_rptr(itname));\ + } \ + int i2d_##fname(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_i2d((const ASN1_VALUE *)a, out, ASN1_ITEM_rptr(itname));\ + } + +# define IMPLEMENT_ASN1_NDEF_FUNCTION(stname) \ + int i2d_##stname##_NDEF(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_ndef_i2d((const ASN1_VALUE *)a, out, ASN1_ITEM_rptr(stname));\ + } + +# define IMPLEMENT_STATIC_ASN1_ENCODE_FUNCTIONS(stname) \ + static stname *d2i_##stname(stname **a, \ + const unsigned char **in, long len) \ + { \ + return (stname *)ASN1_item_d2i((ASN1_VALUE **)a, in, len, \ + ASN1_ITEM_rptr(stname)); \ + } \ + static int i2d_##stname(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_i2d((const ASN1_VALUE *)a, out, \ + ASN1_ITEM_rptr(stname)); \ + } + +# define IMPLEMENT_ASN1_DUP_FUNCTION(stname) \ + stname * stname##_dup(const stname *x) \ + { \ + return ASN1_item_dup(ASN1_ITEM_rptr(stname), x); \ + } + +# define IMPLEMENT_ASN1_PRINT_FUNCTION(stname) \ + IMPLEMENT_ASN1_PRINT_FUNCTION_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_PRINT_FUNCTION_fname(stname, itname, fname) \ + int fname##_print_ctx(BIO *out, const stname *x, int indent, \ + const ASN1_PCTX *pctx) \ + { \ + return ASN1_item_print(out, (const ASN1_VALUE *)x, indent, \ + ASN1_ITEM_rptr(itname), pctx); \ + } + +/* external definitions for primitive types */ + +DECLARE_ASN1_ITEM(ASN1_BOOLEAN) +DECLARE_ASN1_ITEM(ASN1_TBOOLEAN) +DECLARE_ASN1_ITEM(ASN1_FBOOLEAN) +DECLARE_ASN1_ITEM(ASN1_SEQUENCE) +DECLARE_ASN1_ITEM(CBIGNUM) +DECLARE_ASN1_ITEM(BIGNUM) +DECLARE_ASN1_ITEM(INT32) +DECLARE_ASN1_ITEM(ZINT32) +DECLARE_ASN1_ITEM(UINT32) +DECLARE_ASN1_ITEM(ZUINT32) +DECLARE_ASN1_ITEM(INT64) +DECLARE_ASN1_ITEM(ZINT64) +DECLARE_ASN1_ITEM(UINT64) +DECLARE_ASN1_ITEM(ZUINT64) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* + * LONG and ZLONG are strongly discouraged for use as stored data, as the + * underlying C type (long) differs in size depending on the architecture. + * They are designed with 32-bit longs in mind. + */ +DECLARE_ASN1_ITEM(LONG) +DECLARE_ASN1_ITEM(ZLONG) +# endif + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_VALUE, ASN1_VALUE, ASN1_VALUE) +#define sk_ASN1_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_value(sk, idx) ((ASN1_VALUE *)OPENSSL_sk_value(ossl_check_const_ASN1_VALUE_sk_type(sk), (idx))) +#define sk_ASN1_VALUE_new(cmp) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new(ossl_check_ASN1_VALUE_compfunc_type(cmp))) +#define sk_ASN1_VALUE_new_null() ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_VALUE_new_reserve(cmp, n) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_VALUE_compfunc_type(cmp), (n))) +#define sk_ASN1_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_VALUE_sk_type(sk), (n)) +#define sk_ASN1_VALUE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_delete(sk, i) ((ASN1_VALUE *)OPENSSL_sk_delete(ossl_check_ASN1_VALUE_sk_type(sk), (i))) +#define sk_ASN1_VALUE_delete_ptr(sk, ptr) ((ASN1_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr))) +#define sk_ASN1_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_pop(sk) ((ASN1_VALUE *)OPENSSL_sk_pop(ossl_check_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_shift(sk) ((ASN1_VALUE *)OPENSSL_sk_shift(ossl_check_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_VALUE_sk_type(sk),ossl_check_ASN1_VALUE_freefunc_type(freefunc)) +#define sk_ASN1_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr), (idx)) +#define sk_ASN1_VALUE_set(sk, idx, ptr) ((ASN1_VALUE *)OPENSSL_sk_set(ossl_check_ASN1_VALUE_sk_type(sk), (idx), ossl_check_ASN1_VALUE_type(ptr))) +#define sk_ASN1_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr), pnum) +#define sk_ASN1_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_dup(sk) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_copyfunc_type(copyfunc), ossl_check_ASN1_VALUE_freefunc_type(freefunc))) +#define sk_ASN1_VALUE_set_cmp_func(sk, cmp) ((sk_ASN1_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_compfunc_type(cmp))) + + + +/* Functions used internally by the ASN1 code */ + +int ASN1_item_ex_new(ASN1_VALUE **pval, const ASN1_ITEM *it); +void ASN1_item_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it); + +int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx); + +int ASN1_item_ex_i2d(const ASN1_VALUE **pval, unsigned char **out, + const ASN1_ITEM *it, int tag, int aclass); + +/* Legacy compatibility */ +# define IMPLEMENT_ASN1_FUNCTIONS_const(name) IMPLEMENT_ASN1_FUNCTIONS(name) +# define IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h new file mode 100644 index 00000000000..ea584defe5c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/bio.h @@ -0,0 +1,1010 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/bio.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#ifndef OPENSSL_BIO_H +# define OPENSSL_BIO_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_BIO_H +# endif + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# endif +# include + +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/* There are the classes of BIOs */ +# define BIO_TYPE_DESCRIPTOR 0x0100 /* socket, fd, connect or accept */ +# define BIO_TYPE_FILTER 0x0200 +# define BIO_TYPE_SOURCE_SINK 0x0400 + +/* These are the 'types' of BIOs */ +# define BIO_TYPE_NONE 0 +# define BIO_TYPE_MEM ( 1|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_FILE ( 2|BIO_TYPE_SOURCE_SINK) + +# define BIO_TYPE_FD ( 4|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_SOCKET ( 5|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_NULL ( 6|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_SSL ( 7|BIO_TYPE_FILTER) +# define BIO_TYPE_MD ( 8|BIO_TYPE_FILTER) +# define BIO_TYPE_BUFFER ( 9|BIO_TYPE_FILTER) +# define BIO_TYPE_CIPHER (10|BIO_TYPE_FILTER) +# define BIO_TYPE_BASE64 (11|BIO_TYPE_FILTER) +# define BIO_TYPE_CONNECT (12|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_ACCEPT (13|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) + +# define BIO_TYPE_NBIO_TEST (16|BIO_TYPE_FILTER)/* server proxy BIO */ +# define BIO_TYPE_NULL_FILTER (17|BIO_TYPE_FILTER) +# define BIO_TYPE_BIO (19|BIO_TYPE_SOURCE_SINK)/* half a BIO pair */ +# define BIO_TYPE_LINEBUFFER (20|BIO_TYPE_FILTER) +# define BIO_TYPE_DGRAM (21|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_ASN1 (22|BIO_TYPE_FILTER) +# define BIO_TYPE_COMP (23|BIO_TYPE_FILTER) +# ifndef OPENSSL_NO_SCTP +# define BIO_TYPE_DGRAM_SCTP (24|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# endif +# define BIO_TYPE_CORE_TO_PROV (25|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_DGRAM_PAIR (26|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_DGRAM_MEM (27|BIO_TYPE_SOURCE_SINK) + +#define BIO_TYPE_START 128 + +/* + * BIO_FILENAME_READ|BIO_CLOSE to open or close on free. + * BIO_set_fp(in,stdin,BIO_NOCLOSE); + */ +# define BIO_NOCLOSE 0x00 +# define BIO_CLOSE 0x01 + +/* + * These are used in the following macros and are passed to BIO_ctrl() + */ +# define BIO_CTRL_RESET 1/* opt - rewind/zero etc */ +# define BIO_CTRL_EOF 2/* opt - are we at the eof */ +# define BIO_CTRL_INFO 3/* opt - extra tit-bits */ +# define BIO_CTRL_SET 4/* man - set the 'IO' type */ +# define BIO_CTRL_GET 5/* man - get the 'IO' type */ +# define BIO_CTRL_PUSH 6/* opt - internal, used to signify change */ +# define BIO_CTRL_POP 7/* opt - internal, used to signify change */ +# define BIO_CTRL_GET_CLOSE 8/* man - set the 'close' on free */ +# define BIO_CTRL_SET_CLOSE 9/* man - set the 'close' on free */ +# define BIO_CTRL_PENDING 10/* opt - is their more data buffered */ +# define BIO_CTRL_FLUSH 11/* opt - 'flush' buffered output */ +# define BIO_CTRL_DUP 12/* man - extra stuff for 'duped' BIO */ +# define BIO_CTRL_WPENDING 13/* opt - number of bytes still to write */ +# define BIO_CTRL_SET_CALLBACK 14/* opt - set callback function */ +# define BIO_CTRL_GET_CALLBACK 15/* opt - set callback function */ + +# define BIO_CTRL_PEEK 29/* BIO_f_buffer special */ +# define BIO_CTRL_SET_FILENAME 30/* BIO_s_file special */ + +/* dgram BIO stuff */ +# define BIO_CTRL_DGRAM_CONNECT 31/* BIO dgram special */ +# define BIO_CTRL_DGRAM_SET_CONNECTED 32/* allow for an externally connected + * socket to be passed in */ +# define BIO_CTRL_DGRAM_SET_RECV_TIMEOUT 33/* setsockopt, essentially */ +# define BIO_CTRL_DGRAM_GET_RECV_TIMEOUT 34/* getsockopt, essentially */ +# define BIO_CTRL_DGRAM_SET_SEND_TIMEOUT 35/* setsockopt, essentially */ +# define BIO_CTRL_DGRAM_GET_SEND_TIMEOUT 36/* getsockopt, essentially */ + +# define BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP 37/* flag whether the last */ +# define BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP 38/* I/O operation timed out */ + +/* #ifdef IP_MTU_DISCOVER */ +# define BIO_CTRL_DGRAM_MTU_DISCOVER 39/* set DF bit on egress packets */ +/* #endif */ + +# define BIO_CTRL_DGRAM_QUERY_MTU 40/* as kernel for current MTU */ +# define BIO_CTRL_DGRAM_GET_FALLBACK_MTU 47 +# define BIO_CTRL_DGRAM_GET_MTU 41/* get cached value for MTU */ +# define BIO_CTRL_DGRAM_SET_MTU 42/* set cached value for MTU. + * want to use this if asking + * the kernel fails */ + +# define BIO_CTRL_DGRAM_MTU_EXCEEDED 43/* check whether the MTU was + * exceed in the previous write + * operation */ + +# define BIO_CTRL_DGRAM_GET_PEER 46 +# define BIO_CTRL_DGRAM_SET_PEER 44/* Destination for the data */ + +# define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45/* Next DTLS handshake timeout + * to adjust socket timeouts */ +# define BIO_CTRL_DGRAM_SET_DONT_FRAG 48 + +# define BIO_CTRL_DGRAM_GET_MTU_OVERHEAD 49 + +/* Deliberately outside of OPENSSL_NO_SCTP - used in bss_dgram.c */ +# define BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE 50 +# ifndef OPENSSL_NO_SCTP +/* SCTP stuff */ +# define BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY 51 +# define BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY 52 +# define BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD 53 +# define BIO_CTRL_DGRAM_SCTP_GET_SNDINFO 60 +# define BIO_CTRL_DGRAM_SCTP_SET_SNDINFO 61 +# define BIO_CTRL_DGRAM_SCTP_GET_RCVINFO 62 +# define BIO_CTRL_DGRAM_SCTP_SET_RCVINFO 63 +# define BIO_CTRL_DGRAM_SCTP_GET_PRINFO 64 +# define BIO_CTRL_DGRAM_SCTP_SET_PRINFO 65 +# define BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN 70 +# endif + +# define BIO_CTRL_DGRAM_SET_PEEK_MODE 71 + +/* + * internal BIO: + * # define BIO_CTRL_SET_KTLS_SEND 72 + * # define BIO_CTRL_SET_KTLS_SEND_CTRL_MSG 74 + * # define BIO_CTRL_CLEAR_KTLS_CTRL_MSG 75 + */ + +# define BIO_CTRL_GET_KTLS_SEND 73 +# define BIO_CTRL_GET_KTLS_RECV 76 + +# define BIO_CTRL_DGRAM_SCTP_WAIT_FOR_DRY 77 +# define BIO_CTRL_DGRAM_SCTP_MSG_WAITING 78 + +/* BIO_f_prefix controls */ +# define BIO_CTRL_SET_PREFIX 79 +# define BIO_CTRL_SET_INDENT 80 +# define BIO_CTRL_GET_INDENT 81 + +# define BIO_CTRL_DGRAM_GET_LOCAL_ADDR_CAP 82 +# define BIO_CTRL_DGRAM_GET_LOCAL_ADDR_ENABLE 83 +# define BIO_CTRL_DGRAM_SET_LOCAL_ADDR_ENABLE 84 +# define BIO_CTRL_DGRAM_GET_EFFECTIVE_CAPS 85 +# define BIO_CTRL_DGRAM_GET_CAPS 86 +# define BIO_CTRL_DGRAM_SET_CAPS 87 +# define BIO_CTRL_DGRAM_GET_NO_TRUNC 88 +# define BIO_CTRL_DGRAM_SET_NO_TRUNC 89 + +/* + * internal BIO: + * # define BIO_CTRL_SET_KTLS_TX_ZEROCOPY_SENDFILE 90 + */ + +# define BIO_CTRL_GET_RPOLL_DESCRIPTOR 91 +# define BIO_CTRL_GET_WPOLL_DESCRIPTOR 92 +# define BIO_CTRL_DGRAM_DETECT_PEER_ADDR 93 + +# define BIO_DGRAM_CAP_NONE 0U +# define BIO_DGRAM_CAP_HANDLES_SRC_ADDR (1U << 0) +# define BIO_DGRAM_CAP_HANDLES_DST_ADDR (1U << 1) +# define BIO_DGRAM_CAP_PROVIDES_SRC_ADDR (1U << 2) +# define BIO_DGRAM_CAP_PROVIDES_DST_ADDR (1U << 3) + +# ifndef OPENSSL_NO_KTLS +# define BIO_get_ktls_send(b) \ + (BIO_ctrl(b, BIO_CTRL_GET_KTLS_SEND, 0, NULL) > 0) +# define BIO_get_ktls_recv(b) \ + (BIO_ctrl(b, BIO_CTRL_GET_KTLS_RECV, 0, NULL) > 0) +# else +# define BIO_get_ktls_send(b) (0) +# define BIO_get_ktls_recv(b) (0) +# endif + +/* modifiers */ +# define BIO_FP_READ 0x02 +# define BIO_FP_WRITE 0x04 +# define BIO_FP_APPEND 0x08 +# define BIO_FP_TEXT 0x10 + +# define BIO_FLAGS_READ 0x01 +# define BIO_FLAGS_WRITE 0x02 +# define BIO_FLAGS_IO_SPECIAL 0x04 +# define BIO_FLAGS_RWS (BIO_FLAGS_READ|BIO_FLAGS_WRITE|BIO_FLAGS_IO_SPECIAL) +# define BIO_FLAGS_SHOULD_RETRY 0x08 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* This #define was replaced by an internal constant and should not be used. */ +# define BIO_FLAGS_UPLINK 0 +# endif + +# define BIO_FLAGS_BASE64_NO_NL 0x100 + +/* + * This is used with memory BIOs: + * BIO_FLAGS_MEM_RDONLY means we shouldn't free up or change the data in any way; + * BIO_FLAGS_NONCLEAR_RST means we shouldn't clear data on reset. + */ +# define BIO_FLAGS_MEM_RDONLY 0x200 +# define BIO_FLAGS_NONCLEAR_RST 0x400 +# define BIO_FLAGS_IN_EOF 0x800 + +/* the BIO FLAGS values 0x1000 to 0x8000 are reserved for internal KTLS flags */ + +typedef union bio_addr_st BIO_ADDR; +typedef struct bio_addrinfo_st BIO_ADDRINFO; + +int BIO_get_new_index(void); +void BIO_set_flags(BIO *b, int flags); +int BIO_test_flags(const BIO *b, int flags); +void BIO_clear_flags(BIO *b, int flags); + +# define BIO_get_flags(b) BIO_test_flags(b, ~(0x0)) +# define BIO_set_retry_special(b) \ + BIO_set_flags(b, (BIO_FLAGS_IO_SPECIAL|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_set_retry_read(b) \ + BIO_set_flags(b, (BIO_FLAGS_READ|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_set_retry_write(b) \ + BIO_set_flags(b, (BIO_FLAGS_WRITE|BIO_FLAGS_SHOULD_RETRY)) + +/* These are normally used internally in BIOs */ +# define BIO_clear_retry_flags(b) \ + BIO_clear_flags(b, (BIO_FLAGS_RWS|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_get_retry_flags(b) \ + BIO_test_flags(b, (BIO_FLAGS_RWS|BIO_FLAGS_SHOULD_RETRY)) + +/* These should be used by the application to tell why we should retry */ +# define BIO_should_read(a) BIO_test_flags(a, BIO_FLAGS_READ) +# define BIO_should_write(a) BIO_test_flags(a, BIO_FLAGS_WRITE) +# define BIO_should_io_special(a) BIO_test_flags(a, BIO_FLAGS_IO_SPECIAL) +# define BIO_retry_type(a) BIO_test_flags(a, BIO_FLAGS_RWS) +# define BIO_should_retry(a) BIO_test_flags(a, BIO_FLAGS_SHOULD_RETRY) + +/* + * The next three are used in conjunction with the BIO_should_io_special() + * condition. After this returns true, BIO *BIO_get_retry_BIO(BIO *bio, int + * *reason); will walk the BIO stack and return the 'reason' for the special + * and the offending BIO. Given a BIO, BIO_get_retry_reason(bio) will return + * the code. + */ +/* + * Returned from the SSL bio when the certificate retrieval code had an error + */ +# define BIO_RR_SSL_X509_LOOKUP 0x01 +/* Returned from the connect BIO when a connect would have blocked */ +# define BIO_RR_CONNECT 0x02 +/* Returned from the accept BIO when an accept would have blocked */ +# define BIO_RR_ACCEPT 0x03 + +/* These are passed by the BIO callback */ +# define BIO_CB_FREE 0x01 +# define BIO_CB_READ 0x02 +# define BIO_CB_WRITE 0x03 +# define BIO_CB_PUTS 0x04 +# define BIO_CB_GETS 0x05 +# define BIO_CB_CTRL 0x06 +# define BIO_CB_RECVMMSG 0x07 +# define BIO_CB_SENDMMSG 0x08 + +/* + * The callback is called before and after the underling operation, The + * BIO_CB_RETURN flag indicates if it is after the call + */ +# define BIO_CB_RETURN 0x80 +# define BIO_CB_return(a) ((a)|BIO_CB_RETURN) +# define BIO_cb_pre(a) (!((a)&BIO_CB_RETURN)) +# define BIO_cb_post(a) ((a)&BIO_CB_RETURN) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +typedef long (*BIO_callback_fn)(BIO *b, int oper, const char *argp, int argi, + long argl, long ret); +OSSL_DEPRECATEDIN_3_0 BIO_callback_fn BIO_get_callback(const BIO *b); +OSSL_DEPRECATEDIN_3_0 void BIO_set_callback(BIO *b, BIO_callback_fn callback); +OSSL_DEPRECATEDIN_3_0 long BIO_debug_callback(BIO *bio, int cmd, + const char *argp, int argi, + long argl, long ret); +# endif + +typedef long (*BIO_callback_fn_ex)(BIO *b, int oper, const char *argp, + size_t len, int argi, + long argl, int ret, size_t *processed); +BIO_callback_fn_ex BIO_get_callback_ex(const BIO *b); +void BIO_set_callback_ex(BIO *b, BIO_callback_fn_ex callback); +long BIO_debug_callback_ex(BIO *bio, int oper, const char *argp, size_t len, + int argi, long argl, int ret, size_t *processed); + +char *BIO_get_callback_arg(const BIO *b); +void BIO_set_callback_arg(BIO *b, char *arg); + +typedef struct bio_method_st BIO_METHOD; + +const char *BIO_method_name(const BIO *b); +int BIO_method_type(const BIO *b); + +typedef int BIO_info_cb(BIO *, int, int); +typedef BIO_info_cb bio_info_cb; /* backward compatibility */ + +SKM_DEFINE_STACK_OF_INTERNAL(BIO, BIO, BIO) +#define sk_BIO_num(sk) OPENSSL_sk_num(ossl_check_const_BIO_sk_type(sk)) +#define sk_BIO_value(sk, idx) ((BIO *)OPENSSL_sk_value(ossl_check_const_BIO_sk_type(sk), (idx))) +#define sk_BIO_new(cmp) ((STACK_OF(BIO) *)OPENSSL_sk_new(ossl_check_BIO_compfunc_type(cmp))) +#define sk_BIO_new_null() ((STACK_OF(BIO) *)OPENSSL_sk_new_null()) +#define sk_BIO_new_reserve(cmp, n) ((STACK_OF(BIO) *)OPENSSL_sk_new_reserve(ossl_check_BIO_compfunc_type(cmp), (n))) +#define sk_BIO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_BIO_sk_type(sk), (n)) +#define sk_BIO_free(sk) OPENSSL_sk_free(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_zero(sk) OPENSSL_sk_zero(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_delete(sk, i) ((BIO *)OPENSSL_sk_delete(ossl_check_BIO_sk_type(sk), (i))) +#define sk_BIO_delete_ptr(sk, ptr) ((BIO *)OPENSSL_sk_delete_ptr(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr))) +#define sk_BIO_push(sk, ptr) OPENSSL_sk_push(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_pop(sk) ((BIO *)OPENSSL_sk_pop(ossl_check_BIO_sk_type(sk))) +#define sk_BIO_shift(sk) ((BIO *)OPENSSL_sk_shift(ossl_check_BIO_sk_type(sk))) +#define sk_BIO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_BIO_sk_type(sk),ossl_check_BIO_freefunc_type(freefunc)) +#define sk_BIO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr), (idx)) +#define sk_BIO_set(sk, idx, ptr) ((BIO *)OPENSSL_sk_set(ossl_check_BIO_sk_type(sk), (idx), ossl_check_BIO_type(ptr))) +#define sk_BIO_find(sk, ptr) OPENSSL_sk_find(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr), pnum) +#define sk_BIO_sort(sk) OPENSSL_sk_sort(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_BIO_sk_type(sk)) +#define sk_BIO_dup(sk) ((STACK_OF(BIO) *)OPENSSL_sk_dup(ossl_check_const_BIO_sk_type(sk))) +#define sk_BIO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(BIO) *)OPENSSL_sk_deep_copy(ossl_check_const_BIO_sk_type(sk), ossl_check_BIO_copyfunc_type(copyfunc), ossl_check_BIO_freefunc_type(freefunc))) +#define sk_BIO_set_cmp_func(sk, cmp) ((sk_BIO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_BIO_sk_type(sk), ossl_check_BIO_compfunc_type(cmp))) + + + +/* Prefix and suffix callback in ASN1 BIO */ +typedef int asn1_ps_func (BIO *b, unsigned char **pbuf, int *plen, + void *parg); + +typedef void (*BIO_dgram_sctp_notification_handler_fn) (BIO *b, + void *context, + void *buf); +# ifndef OPENSSL_NO_SCTP +/* SCTP parameter structs */ +struct bio_dgram_sctp_sndinfo { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; +}; + +struct bio_dgram_sctp_rcvinfo { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; +}; + +struct bio_dgram_sctp_prinfo { + uint16_t pr_policy; + uint32_t pr_value; +}; +# endif + +/* BIO_sendmmsg/BIO_recvmmsg-related definitions */ +typedef struct bio_msg_st { + void *data; + size_t data_len; + BIO_ADDR *peer, *local; + uint64_t flags; +} BIO_MSG; + +typedef struct bio_mmsg_cb_args_st { + BIO_MSG *msg; + size_t stride, num_msg; + uint64_t flags; + size_t *msgs_processed; +} BIO_MMSG_CB_ARGS; + +#define BIO_POLL_DESCRIPTOR_TYPE_NONE 0 +#define BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD 1 +#define BIO_POLL_DESCRIPTOR_CUSTOM_START 8192 + +typedef struct bio_poll_descriptor_st { + uint32_t type; + union { + int fd; + void *custom; + uintptr_t custom_ui; + } value; +} BIO_POLL_DESCRIPTOR; + +/* + * #define BIO_CONN_get_param_hostname BIO_ctrl + */ + +# define BIO_C_SET_CONNECT 100 +# define BIO_C_DO_STATE_MACHINE 101 +# define BIO_C_SET_NBIO 102 +/* # define BIO_C_SET_PROXY_PARAM 103 */ +# define BIO_C_SET_FD 104 +# define BIO_C_GET_FD 105 +# define BIO_C_SET_FILE_PTR 106 +# define BIO_C_GET_FILE_PTR 107 +# define BIO_C_SET_FILENAME 108 +# define BIO_C_SET_SSL 109 +# define BIO_C_GET_SSL 110 +# define BIO_C_SET_MD 111 +# define BIO_C_GET_MD 112 +# define BIO_C_GET_CIPHER_STATUS 113 +# define BIO_C_SET_BUF_MEM 114 +# define BIO_C_GET_BUF_MEM_PTR 115 +# define BIO_C_GET_BUFF_NUM_LINES 116 +# define BIO_C_SET_BUFF_SIZE 117 +# define BIO_C_SET_ACCEPT 118 +# define BIO_C_SSL_MODE 119 +# define BIO_C_GET_MD_CTX 120 +/* # define BIO_C_GET_PROXY_PARAM 121 */ +# define BIO_C_SET_BUFF_READ_DATA 122/* data to read first */ +# define BIO_C_GET_CONNECT 123 +# define BIO_C_GET_ACCEPT 124 +# define BIO_C_SET_SSL_RENEGOTIATE_BYTES 125 +# define BIO_C_GET_SSL_NUM_RENEGOTIATES 126 +# define BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT 127 +# define BIO_C_FILE_SEEK 128 +# define BIO_C_GET_CIPHER_CTX 129 +# define BIO_C_SET_BUF_MEM_EOF_RETURN 130/* return end of input + * value */ +# define BIO_C_SET_BIND_MODE 131 +# define BIO_C_GET_BIND_MODE 132 +# define BIO_C_FILE_TELL 133 +# define BIO_C_GET_SOCKS 134 +# define BIO_C_SET_SOCKS 135 + +# define BIO_C_SET_WRITE_BUF_SIZE 136/* for BIO_s_bio */ +# define BIO_C_GET_WRITE_BUF_SIZE 137 +# define BIO_C_MAKE_BIO_PAIR 138 +# define BIO_C_DESTROY_BIO_PAIR 139 +# define BIO_C_GET_WRITE_GUARANTEE 140 +# define BIO_C_GET_READ_REQUEST 141 +# define BIO_C_SHUTDOWN_WR 142 +# define BIO_C_NREAD0 143 +# define BIO_C_NREAD 144 +# define BIO_C_NWRITE0 145 +# define BIO_C_NWRITE 146 +# define BIO_C_RESET_READ_REQUEST 147 +# define BIO_C_SET_MD_CTX 148 + +# define BIO_C_SET_PREFIX 149 +# define BIO_C_GET_PREFIX 150 +# define BIO_C_SET_SUFFIX 151 +# define BIO_C_GET_SUFFIX 152 + +# define BIO_C_SET_EX_ARG 153 +# define BIO_C_GET_EX_ARG 154 + +# define BIO_C_SET_CONNECT_MODE 155 + +# define BIO_C_SET_TFO 156 /* like BIO_C_SET_NBIO */ + +# define BIO_C_SET_SOCK_TYPE 157 +# define BIO_C_GET_SOCK_TYPE 158 +# define BIO_C_GET_DGRAM_BIO 159 + +# define BIO_set_app_data(s,arg) BIO_set_ex_data(s,0,arg) +# define BIO_get_app_data(s) BIO_get_ex_data(s,0) + +# define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) +# define BIO_set_tfo(b,n) BIO_ctrl(b,BIO_C_SET_TFO,(n),NULL) + +# ifndef OPENSSL_NO_SOCK +/* IP families we support, for BIO_s_connect() and BIO_s_accept() */ +/* Note: the underlying operating system may not support some of them */ +# define BIO_FAMILY_IPV4 4 +# define BIO_FAMILY_IPV6 6 +# define BIO_FAMILY_IPANY 256 + +/* BIO_s_connect() */ +# define BIO_set_conn_hostname(b,name) BIO_ctrl(b,BIO_C_SET_CONNECT,0, \ + (char *)(name)) +# define BIO_set_conn_port(b,port) BIO_ctrl(b,BIO_C_SET_CONNECT,1, \ + (char *)(port)) +# define BIO_set_conn_address(b,addr) BIO_ctrl(b,BIO_C_SET_CONNECT,2, \ + (char *)(addr)) +# define BIO_set_conn_ip_family(b,f) BIO_int_ctrl(b,BIO_C_SET_CONNECT,3,f) +# define BIO_get_conn_hostname(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,0)) +# define BIO_get_conn_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,1)) +# define BIO_get_conn_address(b) ((const BIO_ADDR *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,2)) +# define BIO_get_conn_ip_family(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,NULL) +# define BIO_get_conn_mode(b) BIO_ctrl(b,BIO_C_GET_CONNECT,4,NULL) +# define BIO_set_conn_mode(b,n) BIO_ctrl(b,BIO_C_SET_CONNECT_MODE,(n),NULL) +# define BIO_set_sock_type(b,t) BIO_ctrl(b,BIO_C_SET_SOCK_TYPE,(t),NULL) +# define BIO_get_sock_type(b) BIO_ctrl(b,BIO_C_GET_SOCK_TYPE,0,NULL) +# define BIO_get0_dgram_bio(b, p) BIO_ctrl(b,BIO_C_GET_DGRAM_BIO,0,(void *)(BIO **)(p)) + +/* BIO_s_accept() */ +# define BIO_set_accept_name(b,name) BIO_ctrl(b,BIO_C_SET_ACCEPT,0, \ + (char *)(name)) +# define BIO_set_accept_port(b,port) BIO_ctrl(b,BIO_C_SET_ACCEPT,1, \ + (char *)(port)) +# define BIO_get_accept_name(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,0)) +# define BIO_get_accept_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,1)) +# define BIO_get_peer_name(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,2)) +# define BIO_get_peer_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,3)) +/* #define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) */ +# define BIO_set_nbio_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,2,(n)?(void *)"a":NULL) +# define BIO_set_accept_bios(b,bio) BIO_ctrl(b,BIO_C_SET_ACCEPT,3, \ + (char *)(bio)) +# define BIO_set_accept_ip_family(b,f) BIO_int_ctrl(b,BIO_C_SET_ACCEPT,4,f) +# define BIO_get_accept_ip_family(b) BIO_ctrl(b,BIO_C_GET_ACCEPT,4,NULL) +# define BIO_set_tfo_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,5,(n)?(void *)"a":NULL) + +/* Aliases kept for backward compatibility */ +# define BIO_BIND_NORMAL 0 +# define BIO_BIND_REUSEADDR BIO_SOCK_REUSEADDR +# define BIO_BIND_REUSEADDR_IF_UNUSED BIO_SOCK_REUSEADDR +# define BIO_set_bind_mode(b,mode) BIO_ctrl(b,BIO_C_SET_BIND_MODE,mode,NULL) +# define BIO_get_bind_mode(b) BIO_ctrl(b,BIO_C_GET_BIND_MODE,0,NULL) +# endif /* OPENSSL_NO_SOCK */ + +# define BIO_do_connect(b) BIO_do_handshake(b) +# define BIO_do_accept(b) BIO_do_handshake(b) + +# define BIO_do_handshake(b) BIO_ctrl(b,BIO_C_DO_STATE_MACHINE,0,NULL) + +/* BIO_s_datagram(), BIO_s_fd(), BIO_s_socket(), BIO_s_accept() and BIO_s_connect() */ +# define BIO_set_fd(b,fd,c) BIO_int_ctrl(b,BIO_C_SET_FD,c,fd) +# define BIO_get_fd(b,c) BIO_ctrl(b,BIO_C_GET_FD,0,(char *)(c)) + +/* BIO_s_file() */ +# define BIO_set_fp(b,fp,c) BIO_ctrl(b,BIO_C_SET_FILE_PTR,c,(char *)(fp)) +# define BIO_get_fp(b,fpp) BIO_ctrl(b,BIO_C_GET_FILE_PTR,0,(char *)(fpp)) + +/* BIO_s_fd() and BIO_s_file() */ +# define BIO_seek(b,ofs) (int)BIO_ctrl(b,BIO_C_FILE_SEEK,ofs,NULL) +# define BIO_tell(b) (int)BIO_ctrl(b,BIO_C_FILE_TELL,0,NULL) + +/* + * name is cast to lose const, but might be better to route through a + * function so we can do it safely + */ +# ifdef CONST_STRICT +/* + * If you are wondering why this isn't defined, its because CONST_STRICT is + * purely a compile-time kludge to allow const to be checked. + */ +int BIO_read_filename(BIO *b, const char *name); +# else +# define BIO_read_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_READ,(char *)(name)) +# endif +# define BIO_write_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_WRITE,name) +# define BIO_append_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_APPEND,name) +# define BIO_rw_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_READ|BIO_FP_WRITE,name) + +/* + * WARNING WARNING, this ups the reference count on the read bio of the SSL + * structure. This is because the ssl read BIO is now pointed to by the + * next_bio field in the bio. So when you free the BIO, make sure you are + * doing a BIO_free_all() to catch the underlying BIO. + */ +# define BIO_set_ssl(b,ssl,c) BIO_ctrl(b,BIO_C_SET_SSL,c,(char *)(ssl)) +# define BIO_get_ssl(b,sslp) BIO_ctrl(b,BIO_C_GET_SSL,0,(char *)(sslp)) +# define BIO_set_ssl_mode(b,client) BIO_ctrl(b,BIO_C_SSL_MODE,client,NULL) +# define BIO_set_ssl_renegotiate_bytes(b,num) \ + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_BYTES,num,NULL) +# define BIO_get_num_renegotiates(b) \ + BIO_ctrl(b,BIO_C_GET_SSL_NUM_RENEGOTIATES,0,NULL) +# define BIO_set_ssl_renegotiate_timeout(b,seconds) \ + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT,seconds,NULL) + +/* defined in evp.h */ +/* #define BIO_set_md(b,md) BIO_ctrl(b,BIO_C_SET_MD,1,(char *)(md)) */ + +# define BIO_get_mem_data(b,pp) BIO_ctrl(b,BIO_CTRL_INFO,0,(char *)(pp)) +# define BIO_set_mem_buf(b,bm,c) BIO_ctrl(b,BIO_C_SET_BUF_MEM,c,(char *)(bm)) +# define BIO_get_mem_ptr(b,pp) BIO_ctrl(b,BIO_C_GET_BUF_MEM_PTR,0, \ + (char *)(pp)) +# define BIO_set_mem_eof_return(b,v) \ + BIO_ctrl(b,BIO_C_SET_BUF_MEM_EOF_RETURN,v,NULL) + +/* For the BIO_f_buffer() type */ +# define BIO_get_buffer_num_lines(b) BIO_ctrl(b,BIO_C_GET_BUFF_NUM_LINES,0,NULL) +# define BIO_set_buffer_size(b,size) BIO_ctrl(b,BIO_C_SET_BUFF_SIZE,size,NULL) +# define BIO_set_read_buffer_size(b,size) BIO_int_ctrl(b,BIO_C_SET_BUFF_SIZE,size,0) +# define BIO_set_write_buffer_size(b,size) BIO_int_ctrl(b,BIO_C_SET_BUFF_SIZE,size,1) +# define BIO_set_buffer_read_data(b,buf,num) BIO_ctrl(b,BIO_C_SET_BUFF_READ_DATA,num,buf) + +/* Don't use the next one unless you know what you are doing :-) */ +# define BIO_dup_state(b,ret) BIO_ctrl(b,BIO_CTRL_DUP,0,(char *)(ret)) + +# define BIO_reset(b) (int)BIO_ctrl(b,BIO_CTRL_RESET,0,NULL) +# define BIO_eof(b) (int)BIO_ctrl(b,BIO_CTRL_EOF,0,NULL) +# define BIO_set_close(b,c) (int)BIO_ctrl(b,BIO_CTRL_SET_CLOSE,(c),NULL) +# define BIO_get_close(b) (int)BIO_ctrl(b,BIO_CTRL_GET_CLOSE,0,NULL) +# define BIO_pending(b) (int)BIO_ctrl(b,BIO_CTRL_PENDING,0,NULL) +# define BIO_wpending(b) (int)BIO_ctrl(b,BIO_CTRL_WPENDING,0,NULL) +/* ...pending macros have inappropriate return type */ +size_t BIO_ctrl_pending(BIO *b); +size_t BIO_ctrl_wpending(BIO *b); +# define BIO_flush(b) (int)BIO_ctrl(b,BIO_CTRL_FLUSH,0,NULL) +# define BIO_get_info_callback(b,cbp) (int)BIO_ctrl(b,BIO_CTRL_GET_CALLBACK,0, \ + cbp) +# define BIO_set_info_callback(b,cb) (int)BIO_callback_ctrl(b,BIO_CTRL_SET_CALLBACK,cb) + +/* For the BIO_f_buffer() type */ +# define BIO_buffer_get_num_lines(b) BIO_ctrl(b,BIO_CTRL_GET,0,NULL) +# define BIO_buffer_peek(b,s,l) BIO_ctrl(b,BIO_CTRL_PEEK,(l),(s)) + +/* For BIO_s_bio() */ +# define BIO_set_write_buf_size(b,size) (int)BIO_ctrl(b,BIO_C_SET_WRITE_BUF_SIZE,size,NULL) +# define BIO_get_write_buf_size(b,size) (size_t)BIO_ctrl(b,BIO_C_GET_WRITE_BUF_SIZE,size,NULL) +# define BIO_make_bio_pair(b1,b2) (int)BIO_ctrl(b1,BIO_C_MAKE_BIO_PAIR,0,b2) +# define BIO_destroy_bio_pair(b) (int)BIO_ctrl(b,BIO_C_DESTROY_BIO_PAIR,0,NULL) +# define BIO_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) +/* macros with inappropriate type -- but ...pending macros use int too: */ +# define BIO_get_write_guarantee(b) (int)BIO_ctrl(b,BIO_C_GET_WRITE_GUARANTEE,0,NULL) +# define BIO_get_read_request(b) (int)BIO_ctrl(b,BIO_C_GET_READ_REQUEST,0,NULL) +size_t BIO_ctrl_get_write_guarantee(BIO *b); +size_t BIO_ctrl_get_read_request(BIO *b); +int BIO_ctrl_reset_read_request(BIO *b); + +/* ctrl macros for dgram */ +# define BIO_ctrl_dgram_connect(b,peer) \ + (int)BIO_ctrl(b,BIO_CTRL_DGRAM_CONNECT,0, (char *)(peer)) +# define BIO_ctrl_set_connected(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_CONNECTED, 0, (char *)(peer)) +# define BIO_dgram_recv_timedout(b) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP, 0, NULL) +# define BIO_dgram_send_timedout(b) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP, 0, NULL) +# define BIO_dgram_get_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_PEER, 0, (char *)(peer)) +# define BIO_dgram_set_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_PEER, 0, (char *)(peer)) +# define BIO_dgram_detect_peer_addr(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_DETECT_PEER_ADDR, 0, (char *)(peer)) +# define BIO_dgram_get_mtu_overhead(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU_OVERHEAD, 0, NULL) +# define BIO_dgram_get_local_addr_cap(b) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_LOCAL_ADDR_CAP, 0, NULL) +# define BIO_dgram_get_local_addr_enable(b, penable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_LOCAL_ADDR_ENABLE, 0, (char *)(penable)) +# define BIO_dgram_set_local_addr_enable(b, enable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_LOCAL_ADDR_ENABLE, (enable), NULL) +# define BIO_dgram_get_effective_caps(b) \ + (uint32_t)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_EFFECTIVE_CAPS, 0, NULL) +# define BIO_dgram_get_caps(b) \ + (uint32_t)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_CAPS, 0, NULL) +# define BIO_dgram_set_caps(b, caps) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_CAPS, (long)(caps), NULL) +# define BIO_dgram_get_no_trunc(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_NO_TRUNC, 0, NULL) +# define BIO_dgram_set_no_trunc(b, enable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_NO_TRUNC, (enable), NULL) +# define BIO_dgram_get_mtu(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU, 0, NULL) +# define BIO_dgram_set_mtu(b, mtu) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_MTU, (mtu), NULL) + +/* ctrl macros for BIO_f_prefix */ +# define BIO_set_prefix(b,p) BIO_ctrl((b), BIO_CTRL_SET_PREFIX, 0, (void *)(p)) +# define BIO_set_indent(b,i) BIO_ctrl((b), BIO_CTRL_SET_INDENT, (i), NULL) +# define BIO_get_indent(b) BIO_ctrl((b), BIO_CTRL_GET_INDENT, 0, NULL) + +#define BIO_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_BIO, l, p, newf, dupf, freef) +int BIO_set_ex_data(BIO *bio, int idx, void *data); +void *BIO_get_ex_data(const BIO *bio, int idx); +uint64_t BIO_number_read(BIO *bio); +uint64_t BIO_number_written(BIO *bio); + +/* For BIO_f_asn1() */ +int BIO_asn1_set_prefix(BIO *b, asn1_ps_func *prefix, + asn1_ps_func *prefix_free); +int BIO_asn1_get_prefix(BIO *b, asn1_ps_func **pprefix, + asn1_ps_func **pprefix_free); +int BIO_asn1_set_suffix(BIO *b, asn1_ps_func *suffix, + asn1_ps_func *suffix_free); +int BIO_asn1_get_suffix(BIO *b, asn1_ps_func **psuffix, + asn1_ps_func **psuffix_free); + +const BIO_METHOD *BIO_s_file(void); +BIO *BIO_new_file(const char *filename, const char *mode); +BIO *BIO_new_from_core_bio(OSSL_LIB_CTX *libctx, OSSL_CORE_BIO *corebio); +# ifndef OPENSSL_NO_STDIO +BIO *BIO_new_fp(FILE *stream, int close_flag); +# endif +BIO *BIO_new_ex(OSSL_LIB_CTX *libctx, const BIO_METHOD *method); +BIO *BIO_new(const BIO_METHOD *type); +int BIO_free(BIO *a); +void BIO_set_data(BIO *a, void *ptr); +void *BIO_get_data(BIO *a); +void BIO_set_init(BIO *a, int init); +int BIO_get_init(BIO *a); +void BIO_set_shutdown(BIO *a, int shut); +int BIO_get_shutdown(BIO *a); +void BIO_vfree(BIO *a); +int BIO_up_ref(BIO *a); +int BIO_read(BIO *b, void *data, int dlen); +int BIO_read_ex(BIO *b, void *data, size_t dlen, size_t *readbytes); +__owur int BIO_recvmmsg(BIO *b, BIO_MSG *msg, + size_t stride, size_t num_msg, uint64_t flags, + size_t *msgs_processed); +int BIO_gets(BIO *bp, char *buf, int size); +int BIO_get_line(BIO *bio, char *buf, int size); +int BIO_write(BIO *b, const void *data, int dlen); +int BIO_write_ex(BIO *b, const void *data, size_t dlen, size_t *written); +__owur int BIO_sendmmsg(BIO *b, BIO_MSG *msg, + size_t stride, size_t num_msg, uint64_t flags, + size_t *msgs_processed); +__owur int BIO_get_rpoll_descriptor(BIO *b, BIO_POLL_DESCRIPTOR *desc); +__owur int BIO_get_wpoll_descriptor(BIO *b, BIO_POLL_DESCRIPTOR *desc); +int BIO_puts(BIO *bp, const char *buf); +int BIO_indent(BIO *b, int indent, int max); +long BIO_ctrl(BIO *bp, int cmd, long larg, void *parg); +long BIO_callback_ctrl(BIO *b, int cmd, BIO_info_cb *fp); +void *BIO_ptr_ctrl(BIO *bp, int cmd, long larg); +long BIO_int_ctrl(BIO *bp, int cmd, long larg, int iarg); +BIO *BIO_push(BIO *b, BIO *append); +BIO *BIO_pop(BIO *b); +void BIO_free_all(BIO *a); +BIO *BIO_find_type(BIO *b, int bio_type); +BIO *BIO_next(BIO *b); +void BIO_set_next(BIO *b, BIO *next); +BIO *BIO_get_retry_BIO(BIO *bio, int *reason); +int BIO_get_retry_reason(BIO *bio); +void BIO_set_retry_reason(BIO *bio, int reason); +BIO *BIO_dup_chain(BIO *in); + +int BIO_nread0(BIO *bio, char **buf); +int BIO_nread(BIO *bio, char **buf, int num); +int BIO_nwrite0(BIO *bio, char **buf); +int BIO_nwrite(BIO *bio, char **buf, int num); + +const BIO_METHOD *BIO_s_mem(void); +# ifndef OPENSSL_NO_DGRAM +const BIO_METHOD *BIO_s_dgram_mem(void); +# endif +const BIO_METHOD *BIO_s_secmem(void); +BIO *BIO_new_mem_buf(const void *buf, int len); +# ifndef OPENSSL_NO_SOCK +const BIO_METHOD *BIO_s_socket(void); +const BIO_METHOD *BIO_s_connect(void); +const BIO_METHOD *BIO_s_accept(void); +# endif +const BIO_METHOD *BIO_s_fd(void); +const BIO_METHOD *BIO_s_log(void); +const BIO_METHOD *BIO_s_bio(void); +const BIO_METHOD *BIO_s_null(void); +const BIO_METHOD *BIO_f_null(void); +const BIO_METHOD *BIO_f_buffer(void); +const BIO_METHOD *BIO_f_readbuffer(void); +const BIO_METHOD *BIO_f_linebuffer(void); +const BIO_METHOD *BIO_f_nbio_test(void); +const BIO_METHOD *BIO_f_prefix(void); +const BIO_METHOD *BIO_s_core(void); +# ifndef OPENSSL_NO_DGRAM +const BIO_METHOD *BIO_s_dgram_pair(void); +const BIO_METHOD *BIO_s_datagram(void); +int BIO_dgram_non_fatal_error(int error); +BIO *BIO_new_dgram(int fd, int close_flag); +# ifndef OPENSSL_NO_SCTP +const BIO_METHOD *BIO_s_datagram_sctp(void); +BIO *BIO_new_dgram_sctp(int fd, int close_flag); +int BIO_dgram_is_sctp(BIO *bio); +int BIO_dgram_sctp_notification_cb(BIO *b, + BIO_dgram_sctp_notification_handler_fn handle_notifications, + void *context); +int BIO_dgram_sctp_wait_for_dry(BIO *b); +int BIO_dgram_sctp_msg_waiting(BIO *b); +# endif +# endif + +# ifndef OPENSSL_NO_SOCK +int BIO_sock_should_retry(int i); +int BIO_sock_non_fatal_error(int error); +int BIO_err_is_non_fatal(unsigned int errcode); +int BIO_socket_wait(int fd, int for_read, time_t max_time); +# endif +int BIO_wait(BIO *bio, time_t max_time, unsigned int nap_milliseconds); +int BIO_do_connect_retry(BIO *bio, int timeout, int nap_milliseconds); + +int BIO_fd_should_retry(int i); +int BIO_fd_non_fatal_error(int error); +int BIO_dump_cb(int (*cb) (const void *data, size_t len, void *u), + void *u, const void *s, int len); +int BIO_dump_indent_cb(int (*cb) (const void *data, size_t len, void *u), + void *u, const void *s, int len, int indent); +int BIO_dump(BIO *b, const void *bytes, int len); +int BIO_dump_indent(BIO *b, const void *bytes, int len, int indent); +# ifndef OPENSSL_NO_STDIO +int BIO_dump_fp(FILE *fp, const void *s, int len); +int BIO_dump_indent_fp(FILE *fp, const void *s, int len, int indent); +# endif +int BIO_hex_string(BIO *out, int indent, int width, const void *data, + int datalen); + +# ifndef OPENSSL_NO_SOCK +BIO_ADDR *BIO_ADDR_new(void); +int BIO_ADDR_copy(BIO_ADDR *dst, const BIO_ADDR *src); +BIO_ADDR *BIO_ADDR_dup(const BIO_ADDR *ap); +int BIO_ADDR_rawmake(BIO_ADDR *ap, int family, + const void *where, size_t wherelen, unsigned short port); +void BIO_ADDR_free(BIO_ADDR *); +void BIO_ADDR_clear(BIO_ADDR *ap); +int BIO_ADDR_family(const BIO_ADDR *ap); +int BIO_ADDR_rawaddress(const BIO_ADDR *ap, void *p, size_t *l); +unsigned short BIO_ADDR_rawport(const BIO_ADDR *ap); +char *BIO_ADDR_hostname_string(const BIO_ADDR *ap, int numeric); +char *BIO_ADDR_service_string(const BIO_ADDR *ap, int numeric); +char *BIO_ADDR_path_string(const BIO_ADDR *ap); + +const BIO_ADDRINFO *BIO_ADDRINFO_next(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_family(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_socktype(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_protocol(const BIO_ADDRINFO *bai); +const BIO_ADDR *BIO_ADDRINFO_address(const BIO_ADDRINFO *bai); +void BIO_ADDRINFO_free(BIO_ADDRINFO *bai); + +enum BIO_hostserv_priorities { + BIO_PARSE_PRIO_HOST, BIO_PARSE_PRIO_SERV +}; +int BIO_parse_hostserv(const char *hostserv, char **host, char **service, + enum BIO_hostserv_priorities hostserv_prio); +enum BIO_lookup_type { + BIO_LOOKUP_CLIENT, BIO_LOOKUP_SERVER +}; +int BIO_lookup(const char *host, const char *service, + enum BIO_lookup_type lookup_type, + int family, int socktype, BIO_ADDRINFO **res); +int BIO_lookup_ex(const char *host, const char *service, + int lookup_type, int family, int socktype, int protocol, + BIO_ADDRINFO **res); +int BIO_sock_error(int sock); +int BIO_socket_ioctl(int fd, long type, void *arg); +int BIO_socket_nbio(int fd, int mode); +int BIO_sock_init(void); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define BIO_sock_cleanup() while(0) continue +# endif +int BIO_set_tcp_ndelay(int sock, int turn_on); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 struct hostent *BIO_gethostbyname(const char *name); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_port(const char *str, unsigned short *port_ptr); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_host_ip(const char *str, unsigned char *ip); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_accept_socket(char *host_port, int mode); +OSSL_DEPRECATEDIN_1_1_0 int BIO_accept(int sock, char **ip_port); +# endif + +union BIO_sock_info_u { + BIO_ADDR *addr; +}; +enum BIO_sock_info_type { + BIO_SOCK_INFO_ADDRESS +}; +int BIO_sock_info(int sock, + enum BIO_sock_info_type type, union BIO_sock_info_u *info); + +# define BIO_SOCK_REUSEADDR 0x01 +# define BIO_SOCK_V6_ONLY 0x02 +# define BIO_SOCK_KEEPALIVE 0x04 +# define BIO_SOCK_NONBLOCK 0x08 +# define BIO_SOCK_NODELAY 0x10 +# define BIO_SOCK_TFO 0x20 + +int BIO_socket(int domain, int socktype, int protocol, int options); +int BIO_connect(int sock, const BIO_ADDR *addr, int options); +int BIO_bind(int sock, const BIO_ADDR *addr, int options); +int BIO_listen(int sock, const BIO_ADDR *addr, int options); +int BIO_accept_ex(int accept_sock, BIO_ADDR *addr, int options); +int BIO_closesocket(int sock); + +BIO *BIO_new_socket(int sock, int close_flag); +BIO *BIO_new_connect(const char *host_port); +BIO *BIO_new_accept(const char *host_port); +# endif /* OPENSSL_NO_SOCK*/ + +BIO *BIO_new_fd(int fd, int close_flag); + +int BIO_new_bio_pair(BIO **bio1, size_t writebuf1, + BIO **bio2, size_t writebuf2); +# ifndef OPENSSL_NO_DGRAM +int BIO_new_bio_dgram_pair(BIO **bio1, size_t writebuf1, + BIO **bio2, size_t writebuf2); +# endif + +/* + * If successful, returns 1 and in *bio1, *bio2 two BIO pair endpoints. + * Otherwise returns 0 and sets *bio1 and *bio2 to NULL. Size 0 uses default + * value. + */ + +void BIO_copy_next_retry(BIO *b); + +/* + * long BIO_ghbn_ctrl(int cmd,int iarg,char *parg); + */ + +# define ossl_bio__attr__(x) +# if defined(__GNUC__) && defined(__STDC_VERSION__) \ + && !defined(__MINGW32__) && !defined(__MINGW64__) \ + && !defined(__APPLE__) + /* + * Because we support the 'z' modifier, which made its appearance in C99, + * we can't use __attribute__ with pre C99 dialects. + */ +# if __STDC_VERSION__ >= 199901L +# undef ossl_bio__attr__ +# define ossl_bio__attr__ __attribute__ +# if __GNUC__*10 + __GNUC_MINOR__ >= 44 +# define ossl_bio__printf__ __gnu_printf__ +# else +# define ossl_bio__printf__ __printf__ +# endif +# endif +# endif +int BIO_printf(BIO *bio, const char *format, ...) +ossl_bio__attr__((__format__(ossl_bio__printf__, 2, 3))); +int BIO_vprintf(BIO *bio, const char *format, va_list args) +ossl_bio__attr__((__format__(ossl_bio__printf__, 2, 0))); +int BIO_snprintf(char *buf, size_t n, const char *format, ...) +ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 4))); +int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args) +ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 0))); +# undef ossl_bio__attr__ +# undef ossl_bio__printf__ + + +BIO_METHOD *BIO_meth_new(int type, const char *name); +void BIO_meth_free(BIO_METHOD *biom); +int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, int); +int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, size_t, + size_t *); +int BIO_meth_set_write(BIO_METHOD *biom, + int (*write) (BIO *, const char *, int)); +int BIO_meth_set_write_ex(BIO_METHOD *biom, + int (*bwrite) (BIO *, const char *, size_t, size_t *)); +int BIO_meth_set_sendmmsg(BIO_METHOD *biom, + int (*f) (BIO *, BIO_MSG *, size_t, size_t, + uint64_t, size_t *)); +int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int); +int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, size_t, size_t *); +int BIO_meth_set_read(BIO_METHOD *biom, + int (*read) (BIO *, char *, int)); +int BIO_meth_set_read_ex(BIO_METHOD *biom, + int (*bread) (BIO *, char *, size_t, size_t *)); +int BIO_meth_set_recvmmsg(BIO_METHOD *biom, + int (*f) (BIO *, BIO_MSG *, size_t, size_t, + uint64_t, size_t *)); +int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *); +int BIO_meth_set_puts(BIO_METHOD *biom, + int (*puts) (BIO *, const char *)); +int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int); +int BIO_meth_set_gets(BIO_METHOD *biom, + int (*ossl_gets) (BIO *, char *, int)); +long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, long, void *); +int BIO_meth_set_ctrl(BIO_METHOD *biom, + long (*ctrl) (BIO *, int, long, void *)); +int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *); +int BIO_meth_set_create(BIO_METHOD *biom, int (*create) (BIO *)); +int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *); +int BIO_meth_set_destroy(BIO_METHOD *biom, int (*destroy) (BIO *)); +long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) + (BIO *, int, BIO_info_cb *); +int BIO_meth_set_callback_ctrl(BIO_METHOD *biom, + long (*callback_ctrl) (BIO *, int, + BIO_info_cb *)); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h new file mode 100644 index 00000000000..60beffd57ef --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h @@ -0,0 +1,629 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/cmp.h.in + * + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright Nokia 2007-2019 + * Copyright Siemens AG 2015-2019 + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CMP_H +# define OPENSSL_CMP_H + +# include +# ifndef OPENSSL_NO_CMP + +# include +# include +# include +# include + +/* explicit #includes not strictly needed since implied by the above: */ +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# define OSSL_CMP_PVNO_2 2 +# define OSSL_CMP_PVNO_3 3 +# define OSSL_CMP_PVNO OSSL_CMP_PVNO_2 /* v2 is the default */ + +/*- + * PKIFailureInfo ::= BIT STRING { + * -- since we can fail in more than one way! + * -- More codes may be added in the future if/when required. + * badAlg (0), + * -- unrecognized or unsupported Algorithm Identifier + * badMessageCheck (1), + * -- integrity check failed (e.g., signature did not verify) + * badRequest (2), + * -- transaction not permitted or supported + * badTime (3), + * -- messageTime was not sufficiently close to the system time, + * -- as defined by local policy + * badCertId (4), + * -- no certificate could be found matching the provided criteria + * badDataFormat (5), + * -- the data submitted has the wrong format + * wrongAuthority (6), + * -- the authority indicated in the request is different from the + * -- one creating the response token + * incorrectData (7), + * -- the requester's data is incorrect (for notary services) + * missingTimeStamp (8), + * -- when the timestamp is missing but should be there + * -- (by policy) + * badPOP (9), + * -- the proof-of-possession failed + * certRevoked (10), + * -- the certificate has already been revoked + * certConfirmed (11), + * -- the certificate has already been confirmed + * wrongIntegrity (12), + * -- invalid integrity, password based instead of signature or + * -- vice versa + * badRecipientNonce (13), + * -- invalid recipient nonce, either missing or wrong value + * timeNotAvailable (14), + * -- the TSA's time source is not available + * unacceptedPolicy (15), + * -- the requested TSA policy is not supported by the TSA. + * unacceptedExtension (16), + * -- the requested extension is not supported by the TSA. + * addInfoNotAvailable (17), + * -- the additional information requested could not be + * -- understood or is not available + * badSenderNonce (18), + * -- invalid sender nonce, either missing or wrong size + * badCertTemplate (19), + * -- invalid cert. template or missing mandatory information + * signerNotTrusted (20), + * -- signer of the message unknown or not trusted + * transactionIdInUse (21), + * -- the transaction identifier is already in use + * unsupportedVersion (22), + * -- the version of the message is not supported + * notAuthorized (23), + * -- the sender was not authorized to make the preceding + * -- request or perform the preceding action + * systemUnavail (24), + * -- the request cannot be handled due to system unavailability + * systemFailure (25), + * -- the request cannot be handled due to system failure + * duplicateCertReq (26) + * -- certificate cannot be issued because a duplicate + * -- certificate already exists + * } + */ +# define OSSL_CMP_PKIFAILUREINFO_badAlg 0 +# define OSSL_CMP_PKIFAILUREINFO_badMessageCheck 1 +# define OSSL_CMP_PKIFAILUREINFO_badRequest 2 +# define OSSL_CMP_PKIFAILUREINFO_badTime 3 +# define OSSL_CMP_PKIFAILUREINFO_badCertId 4 +# define OSSL_CMP_PKIFAILUREINFO_badDataFormat 5 +# define OSSL_CMP_PKIFAILUREINFO_wrongAuthority 6 +# define OSSL_CMP_PKIFAILUREINFO_incorrectData 7 +# define OSSL_CMP_PKIFAILUREINFO_missingTimeStamp 8 +# define OSSL_CMP_PKIFAILUREINFO_badPOP 9 +# define OSSL_CMP_PKIFAILUREINFO_certRevoked 10 +# define OSSL_CMP_PKIFAILUREINFO_certConfirmed 11 +# define OSSL_CMP_PKIFAILUREINFO_wrongIntegrity 12 +# define OSSL_CMP_PKIFAILUREINFO_badRecipientNonce 13 +# define OSSL_CMP_PKIFAILUREINFO_timeNotAvailable 14 +# define OSSL_CMP_PKIFAILUREINFO_unacceptedPolicy 15 +# define OSSL_CMP_PKIFAILUREINFO_unacceptedExtension 16 +# define OSSL_CMP_PKIFAILUREINFO_addInfoNotAvailable 17 +# define OSSL_CMP_PKIFAILUREINFO_badSenderNonce 18 +# define OSSL_CMP_PKIFAILUREINFO_badCertTemplate 19 +# define OSSL_CMP_PKIFAILUREINFO_signerNotTrusted 20 +# define OSSL_CMP_PKIFAILUREINFO_transactionIdInUse 21 +# define OSSL_CMP_PKIFAILUREINFO_unsupportedVersion 22 +# define OSSL_CMP_PKIFAILUREINFO_notAuthorized 23 +# define OSSL_CMP_PKIFAILUREINFO_systemUnavail 24 +# define OSSL_CMP_PKIFAILUREINFO_systemFailure 25 +# define OSSL_CMP_PKIFAILUREINFO_duplicateCertReq 26 +# define OSSL_CMP_PKIFAILUREINFO_MAX 26 +# define OSSL_CMP_PKIFAILUREINFO_MAX_BIT_PATTERN \ + ((1 << (OSSL_CMP_PKIFAILUREINFO_MAX + 1)) - 1) +# if OSSL_CMP_PKIFAILUREINFO_MAX_BIT_PATTERN > INT_MAX +# error CMP_PKIFAILUREINFO_MAX bit pattern does not fit in type int +# endif +typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; + +# define OSSL_CMP_CTX_FAILINFO_badAlg (1 << 0) +# define OSSL_CMP_CTX_FAILINFO_badMessageCheck (1 << 1) +# define OSSL_CMP_CTX_FAILINFO_badRequest (1 << 2) +# define OSSL_CMP_CTX_FAILINFO_badTime (1 << 3) +# define OSSL_CMP_CTX_FAILINFO_badCertId (1 << 4) +# define OSSL_CMP_CTX_FAILINFO_badDataFormat (1 << 5) +# define OSSL_CMP_CTX_FAILINFO_wrongAuthority (1 << 6) +# define OSSL_CMP_CTX_FAILINFO_incorrectData (1 << 7) +# define OSSL_CMP_CTX_FAILINFO_missingTimeStamp (1 << 8) +# define OSSL_CMP_CTX_FAILINFO_badPOP (1 << 9) +# define OSSL_CMP_CTX_FAILINFO_certRevoked (1 << 10) +# define OSSL_CMP_CTX_FAILINFO_certConfirmed (1 << 11) +# define OSSL_CMP_CTX_FAILINFO_wrongIntegrity (1 << 12) +# define OSSL_CMP_CTX_FAILINFO_badRecipientNonce (1 << 13) +# define OSSL_CMP_CTX_FAILINFO_timeNotAvailable (1 << 14) +# define OSSL_CMP_CTX_FAILINFO_unacceptedPolicy (1 << 15) +# define OSSL_CMP_CTX_FAILINFO_unacceptedExtension (1 << 16) +# define OSSL_CMP_CTX_FAILINFO_addInfoNotAvailable (1 << 17) +# define OSSL_CMP_CTX_FAILINFO_badSenderNonce (1 << 18) +# define OSSL_CMP_CTX_FAILINFO_badCertTemplate (1 << 19) +# define OSSL_CMP_CTX_FAILINFO_signerNotTrusted (1 << 20) +# define OSSL_CMP_CTX_FAILINFO_transactionIdInUse (1 << 21) +# define OSSL_CMP_CTX_FAILINFO_unsupportedVersion (1 << 22) +# define OSSL_CMP_CTX_FAILINFO_notAuthorized (1 << 23) +# define OSSL_CMP_CTX_FAILINFO_systemUnavail (1 << 24) +# define OSSL_CMP_CTX_FAILINFO_systemFailure (1 << 25) +# define OSSL_CMP_CTX_FAILINFO_duplicateCertReq (1 << 26) + +/*- + * PKIStatus ::= INTEGER { + * accepted (0), + * -- you got exactly what you asked for + * grantedWithMods (1), + * -- you got something like what you asked for; the + * -- requester is responsible for ascertaining the differences + * rejection (2), + * -- you don't get it, more information elsewhere in the message + * waiting (3), + * -- the request body part has not yet been processed; expect to + * -- hear more later (note: proper handling of this status + * -- response MAY use the polling req/rep PKIMessages specified + * -- in Section 5.3.22; alternatively, polling in the underlying + * -- transport layer MAY have some utility in this regard) + * revocationWarning (4), + * -- this message contains a warning that a revocation is + * -- imminent + * revocationNotification (5), + * -- notification that a revocation has occurred + * keyUpdateWarning (6) + * -- update already done for the oldCertId specified in + * -- CertReqMsg + * } + */ +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_revocationNotification 5 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; + +DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) + +# define OSSL_CMP_CERTORENCCERT_CERTIFICATE 0 +# define OSSL_CMP_CERTORENCCERT_ENCRYPTEDCERT 1 + +/* data type declarations */ +typedef struct ossl_cmp_ctx_st OSSL_CMP_CTX; +typedef struct ossl_cmp_pkiheader_st OSSL_CMP_PKIHEADER; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_PKIHEADER) +typedef struct ossl_cmp_msg_st OSSL_CMP_MSG; +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_MSG) +DECLARE_ASN1_ENCODE_FUNCTIONS(OSSL_CMP_MSG, OSSL_CMP_MSG, OSSL_CMP_MSG) +typedef struct ossl_cmp_certstatus_st OSSL_CMP_CERTSTATUS; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTSTATUS, OSSL_CMP_CERTSTATUS, OSSL_CMP_CERTSTATUS) +#define sk_OSSL_CMP_CERTSTATUS_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_value(sk, idx) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTSTATUS_new(cmp) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTSTATUS_new_null() ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTSTATUS_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTSTATUS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTSTATUS_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_delete(sk, i) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTSTATUS_delete_ptr(sk, ptr) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr))) +#define sk_OSSL_CMP_CERTSTATUS_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_pop(sk) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_shift(sk) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk),ossl_check_OSSL_CMP_CERTSTATUS_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTSTATUS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTSTATUS_set(sk, idx, ptr) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr))) +#define sk_OSSL_CMP_CERTSTATUS_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr), pnum) +#define sk_OSSL_CMP_CERTSTATUS_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_dup(sk) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTSTATUS_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTSTATUS_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTSTATUS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp))) + +typedef struct ossl_cmp_itav_st OSSL_CMP_ITAV; +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_ITAV) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_ITAV, OSSL_CMP_ITAV, OSSL_CMP_ITAV) +#define sk_OSSL_CMP_ITAV_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_value(sk, idx) ((OSSL_CMP_ITAV *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk), (idx))) +#define sk_OSSL_CMP_ITAV_new(cmp) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp))) +#define sk_OSSL_CMP_ITAV_new_null() ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_ITAV_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_ITAV_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (n)) +#define sk_OSSL_CMP_ITAV_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_delete(sk, i) ((OSSL_CMP_ITAV *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (i))) +#define sk_OSSL_CMP_ITAV_delete_ptr(sk, ptr) ((OSSL_CMP_ITAV *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr))) +#define sk_OSSL_CMP_ITAV_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_pop(sk) ((OSSL_CMP_ITAV *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_shift(sk) ((OSSL_CMP_ITAV *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_ITAV_sk_type(sk),ossl_check_OSSL_CMP_ITAV_freefunc_type(freefunc)) +#define sk_OSSL_CMP_ITAV_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr), (idx)) +#define sk_OSSL_CMP_ITAV_set(sk, idx, ptr) ((OSSL_CMP_ITAV *)OPENSSL_sk_set(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (idx), ossl_check_OSSL_CMP_ITAV_type(ptr))) +#define sk_OSSL_CMP_ITAV_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr), pnum) +#define sk_OSSL_CMP_ITAV_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_dup(sk) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_ITAV_freefunc_type(freefunc))) +#define sk_OSSL_CMP_ITAV_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_ITAV_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp))) + +typedef struct ossl_cmp_revrepcontent_st OSSL_CMP_REVREPCONTENT; +typedef struct ossl_cmp_pkisi_st OSSL_CMP_PKISI; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_PKISI) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_PKISI) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_PKISI, OSSL_CMP_PKISI, OSSL_CMP_PKISI) +#define sk_OSSL_CMP_PKISI_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_value(sk, idx) ((OSSL_CMP_PKISI *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk), (idx))) +#define sk_OSSL_CMP_PKISI_new(cmp) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp))) +#define sk_OSSL_CMP_PKISI_new_null() ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_PKISI_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_PKISI_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (n)) +#define sk_OSSL_CMP_PKISI_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_delete(sk, i) ((OSSL_CMP_PKISI *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (i))) +#define sk_OSSL_CMP_PKISI_delete_ptr(sk, ptr) ((OSSL_CMP_PKISI *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr))) +#define sk_OSSL_CMP_PKISI_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_pop(sk) ((OSSL_CMP_PKISI *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_shift(sk) ((OSSL_CMP_PKISI *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_PKISI_sk_type(sk),ossl_check_OSSL_CMP_PKISI_freefunc_type(freefunc)) +#define sk_OSSL_CMP_PKISI_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr), (idx)) +#define sk_OSSL_CMP_PKISI_set(sk, idx, ptr) ((OSSL_CMP_PKISI *)OPENSSL_sk_set(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (idx), ossl_check_OSSL_CMP_PKISI_type(ptr))) +#define sk_OSSL_CMP_PKISI_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr), pnum) +#define sk_OSSL_CMP_PKISI_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_dup(sk) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_PKISI_freefunc_type(freefunc))) +#define sk_OSSL_CMP_PKISI_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_PKISI_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp))) + +typedef struct ossl_cmp_certrepmessage_st OSSL_CMP_CERTREPMESSAGE; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTREPMESSAGE, OSSL_CMP_CERTREPMESSAGE, OSSL_CMP_CERTREPMESSAGE) +#define sk_OSSL_CMP_CERTREPMESSAGE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_value(sk, idx) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTREPMESSAGE_new(cmp) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTREPMESSAGE_new_null() ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTREPMESSAGE_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTREPMESSAGE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTREPMESSAGE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_delete(sk, i) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTREPMESSAGE_delete_ptr(sk, ptr) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr))) +#define sk_OSSL_CMP_CERTREPMESSAGE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_pop(sk) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_shift(sk) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk),ossl_check_OSSL_CMP_CERTREPMESSAGE_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTREPMESSAGE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTREPMESSAGE_set(sk, idx, ptr) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr))) +#define sk_OSSL_CMP_CERTREPMESSAGE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr), pnum) +#define sk_OSSL_CMP_CERTREPMESSAGE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_dup(sk) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTREPMESSAGE_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTREPMESSAGE_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTREPMESSAGE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp))) + +typedef struct ossl_cmp_pollrep_st OSSL_CMP_POLLREP; +typedef STACK_OF(OSSL_CMP_POLLREP) OSSL_CMP_POLLREPCONTENT; +typedef struct ossl_cmp_certresponse_st OSSL_CMP_CERTRESPONSE; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTRESPONSE, OSSL_CMP_CERTRESPONSE, OSSL_CMP_CERTRESPONSE) +#define sk_OSSL_CMP_CERTRESPONSE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_value(sk, idx) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTRESPONSE_new(cmp) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTRESPONSE_new_null() ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTRESPONSE_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTRESPONSE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTRESPONSE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_delete(sk, i) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTRESPONSE_delete_ptr(sk, ptr) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr))) +#define sk_OSSL_CMP_CERTRESPONSE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_pop(sk) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_shift(sk) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk),ossl_check_OSSL_CMP_CERTRESPONSE_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTRESPONSE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTRESPONSE_set(sk, idx, ptr) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr))) +#define sk_OSSL_CMP_CERTRESPONSE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr), pnum) +#define sk_OSSL_CMP_CERTRESPONSE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_dup(sk) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTRESPONSE_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTRESPONSE_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTRESPONSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp))) + +typedef STACK_OF(ASN1_UTF8STRING) OSSL_CMP_PKIFREETEXT; + +/* + * function DECLARATIONS + */ + +/* from cmp_asn.c */ +OSSL_CMP_ITAV *OSSL_CMP_ITAV_create(ASN1_OBJECT *type, ASN1_TYPE *value); +void OSSL_CMP_ITAV_set0(OSSL_CMP_ITAV *itav, ASN1_OBJECT *type, + ASN1_TYPE *value); +ASN1_OBJECT *OSSL_CMP_ITAV_get0_type(const OSSL_CMP_ITAV *itav); +ASN1_TYPE *OSSL_CMP_ITAV_get0_value(const OSSL_CMP_ITAV *itav); +int OSSL_CMP_ITAV_push0_stack_item(STACK_OF(OSSL_CMP_ITAV) **itav_sk_p, + OSSL_CMP_ITAV *itav); +void OSSL_CMP_ITAV_free(OSSL_CMP_ITAV *itav); + +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_caCerts(const STACK_OF(X509) *caCerts); +int OSSL_CMP_ITAV_get0_caCerts(const OSSL_CMP_ITAV *itav, STACK_OF(X509) **out); + +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_rootCaCert(const X509 *rootCaCert); +int OSSL_CMP_ITAV_get0_rootCaCert(const OSSL_CMP_ITAV *itav, X509 **out); +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_rootCaKeyUpdate(const X509 *newWithNew, + const X509 *newWithOld, + const X509 *oldWithNew); +int OSSL_CMP_ITAV_get0_rootCaKeyUpdate(const OSSL_CMP_ITAV *itav, + X509 **newWithNew, + X509 **newWithOld, + X509 **oldWithNew); + +void OSSL_CMP_MSG_free(OSSL_CMP_MSG *msg); + +/* from cmp_ctx.c */ +OSSL_CMP_CTX *OSSL_CMP_CTX_new(OSSL_LIB_CTX *libctx, const char *propq); +void OSSL_CMP_CTX_free(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_reinit(OSSL_CMP_CTX *ctx); +OSSL_LIB_CTX *OSSL_CMP_CTX_get0_libctx(const OSSL_CMP_CTX *ctx); +const char *OSSL_CMP_CTX_get0_propq(const OSSL_CMP_CTX *ctx); +/* CMP general options: */ +# define OSSL_CMP_OPT_LOG_VERBOSITY 0 +/* CMP transfer options: */ +# define OSSL_CMP_OPT_KEEP_ALIVE 10 +# define OSSL_CMP_OPT_MSG_TIMEOUT 11 +# define OSSL_CMP_OPT_TOTAL_TIMEOUT 12 +# define OSSL_CMP_OPT_USE_TLS 13 +/* CMP request options: */ +# define OSSL_CMP_OPT_VALIDITY_DAYS 20 +# define OSSL_CMP_OPT_SUBJECTALTNAME_NODEFAULT 21 +# define OSSL_CMP_OPT_SUBJECTALTNAME_CRITICAL 22 +# define OSSL_CMP_OPT_POLICIES_CRITICAL 23 +# define OSSL_CMP_OPT_POPO_METHOD 24 +# define OSSL_CMP_OPT_IMPLICIT_CONFIRM 25 +# define OSSL_CMP_OPT_DISABLE_CONFIRM 26 +# define OSSL_CMP_OPT_REVOCATION_REASON 27 +/* CMP protection options: */ +# define OSSL_CMP_OPT_UNPROTECTED_SEND 30 +# define OSSL_CMP_OPT_UNPROTECTED_ERRORS 31 +# define OSSL_CMP_OPT_OWF_ALGNID 32 +# define OSSL_CMP_OPT_MAC_ALGNID 33 +# define OSSL_CMP_OPT_DIGEST_ALGNID 34 +# define OSSL_CMP_OPT_IGNORE_KEYUSAGE 35 +# define OSSL_CMP_OPT_PERMIT_TA_IN_EXTRACERTS_FOR_IR 36 +int OSSL_CMP_CTX_set_option(OSSL_CMP_CTX *ctx, int opt, int val); +int OSSL_CMP_CTX_get_option(const OSSL_CMP_CTX *ctx, int opt); +/* CMP-specific callback for logging and outputting the error queue: */ +int OSSL_CMP_CTX_set_log_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_log_cb_t cb); +# define OSSL_CMP_CTX_set_log_verbosity(ctx, level) \ + OSSL_CMP_CTX_set_option(ctx, OSSL_CMP_OPT_LOG_VERBOSITY, level) +void OSSL_CMP_CTX_print_errors(const OSSL_CMP_CTX *ctx); +/* message transfer: */ +int OSSL_CMP_CTX_set1_serverPath(OSSL_CMP_CTX *ctx, const char *path); +int OSSL_CMP_CTX_set1_server(OSSL_CMP_CTX *ctx, const char *address); +int OSSL_CMP_CTX_set_serverPort(OSSL_CMP_CTX *ctx, int port); +int OSSL_CMP_CTX_set1_proxy(OSSL_CMP_CTX *ctx, const char *name); +int OSSL_CMP_CTX_set1_no_proxy(OSSL_CMP_CTX *ctx, const char *names); +# ifndef OPENSSL_NO_HTTP +int OSSL_CMP_CTX_set_http_cb(OSSL_CMP_CTX *ctx, OSSL_HTTP_bio_cb_t cb); +int OSSL_CMP_CTX_set_http_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_http_cb_arg(const OSSL_CMP_CTX *ctx); +# endif +typedef OSSL_CMP_MSG *(*OSSL_CMP_transfer_cb_t) (OSSL_CMP_CTX *ctx, + const OSSL_CMP_MSG *req); +int OSSL_CMP_CTX_set_transfer_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_transfer_cb_t cb); +int OSSL_CMP_CTX_set_transfer_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_transfer_cb_arg(const OSSL_CMP_CTX *ctx); +/* server authentication: */ +int OSSL_CMP_CTX_set1_srvCert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_set1_expected_sender(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_set0_trustedStore(OSSL_CMP_CTX *ctx, X509_STORE *store); +# define OSSL_CMP_CTX_set0_trusted OSSL_CMP_CTX_set0_trustedStore +X509_STORE *OSSL_CMP_CTX_get0_trustedStore(const OSSL_CMP_CTX *ctx); +# define OSSL_CMP_CTX_get0_trusted OSSL_CMP_CTX_get0_trustedStore +int OSSL_CMP_CTX_set1_untrusted(OSSL_CMP_CTX *ctx, STACK_OF(X509) *certs); +STACK_OF(X509) *OSSL_CMP_CTX_get0_untrusted(const OSSL_CMP_CTX *ctx); +/* client authentication: */ +int OSSL_CMP_CTX_set1_cert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, + STACK_OF(X509) *candidates); +int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); +int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, + const unsigned char *ref, int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); +/* CMP message header and extra certificates: */ +int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, + STACK_OF(X509) *extraCertsOut); +/* certificate template: */ +int OSSL_CMP_CTX_set0_newPkey(OSSL_CMP_CTX *ctx, int priv, EVP_PKEY *pkey); +EVP_PKEY *OSSL_CMP_CTX_get0_newPkey(const OSSL_CMP_CTX *ctx, int priv); +int OSSL_CMP_CTX_set1_issuer(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_set1_serialNumber(OSSL_CMP_CTX *ctx, const ASN1_INTEGER *sn); +int OSSL_CMP_CTX_set1_subjectName(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_push1_subjectAltName(OSSL_CMP_CTX *ctx, + const GENERAL_NAME *name); +int OSSL_CMP_CTX_set0_reqExtensions(OSSL_CMP_CTX *ctx, X509_EXTENSIONS *exts); +int OSSL_CMP_CTX_reqExtensions_have_SAN(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_push0_policy(OSSL_CMP_CTX *ctx, POLICYINFO *pinfo); +int OSSL_CMP_CTX_set1_oldCert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_set1_p10CSR(OSSL_CMP_CTX *ctx, const X509_REQ *csr); +/* misc body contents: */ +int OSSL_CMP_CTX_push0_genm_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +/* certificate confirmation: */ +typedef int (*OSSL_CMP_certConf_cb_t) (OSSL_CMP_CTX *ctx, X509 *cert, + int fail_info, const char **txt); +int OSSL_CMP_certConf_cb(OSSL_CMP_CTX *ctx, X509 *cert, int fail_info, + const char **text); +int OSSL_CMP_CTX_set_certConf_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_certConf_cb_t cb); +int OSSL_CMP_CTX_set_certConf_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_certConf_cb_arg(const OSSL_CMP_CTX *ctx); +/* result fetching: */ +int OSSL_CMP_CTX_get_status(const OSSL_CMP_CTX *ctx); +OSSL_CMP_PKIFREETEXT *OSSL_CMP_CTX_get0_statusString(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_get_failInfoCode(const OSSL_CMP_CTX *ctx); +# define OSSL_CMP_PKISI_BUFLEN 1024 +X509 *OSSL_CMP_CTX_get0_validatedSrvCert(const OSSL_CMP_CTX *ctx); +X509 *OSSL_CMP_CTX_get0_newCert(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_newChain(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_caPubs(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_extraCertsIn(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_set1_transactionID(OSSL_CMP_CTX *ctx, + const ASN1_OCTET_STRING *id); +int OSSL_CMP_CTX_set1_senderNonce(OSSL_CMP_CTX *ctx, + const ASN1_OCTET_STRING *nonce); + +/* from cmp_status.c */ +char *OSSL_CMP_CTX_snprint_PKIStatus(const OSSL_CMP_CTX *ctx, char *buf, + size_t bufsize); +char *OSSL_CMP_snprint_PKIStatusInfo(const OSSL_CMP_PKISI *statusInfo, + char *buf, size_t bufsize); +OSSL_CMP_PKISI * +OSSL_CMP_STATUSINFO_new(int status, int fail_info, const char *text); + +/* from cmp_hdr.c */ +ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_transactionID(const + OSSL_CMP_PKIHEADER *hdr); +ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); + +/* from cmp_msg.c */ +OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); +OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, + const char *propq); +int OSSL_CMP_MSG_write(const char *file, const OSSL_CMP_MSG *msg); +OSSL_CMP_MSG *d2i_OSSL_CMP_MSG_bio(BIO *bio, OSSL_CMP_MSG **msg); +int i2d_OSSL_CMP_MSG_bio(BIO *bio, const OSSL_CMP_MSG *msg); + +/* from cmp_vfy.c */ +int OSSL_CMP_validate_msg(OSSL_CMP_CTX *ctx, const OSSL_CMP_MSG *msg); +int OSSL_CMP_validate_cert_path(const OSSL_CMP_CTX *ctx, + X509_STORE *trusted_store, X509 *cert); + +/* from cmp_http.c */ +# ifndef OPENSSL_NO_HTTP +OSSL_CMP_MSG *OSSL_CMP_MSG_http_perform(OSSL_CMP_CTX *ctx, + const OSSL_CMP_MSG *req); +# endif + +/* from cmp_server.c */ +typedef struct ossl_cmp_srv_ctx_st OSSL_CMP_SRV_CTX; +OSSL_CMP_MSG *OSSL_CMP_SRV_process_request(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req); +OSSL_CMP_MSG * OSSL_CMP_CTX_server_perform(OSSL_CMP_CTX *client_ctx, + const OSSL_CMP_MSG *req); +OSSL_CMP_SRV_CTX *OSSL_CMP_SRV_CTX_new(OSSL_LIB_CTX *libctx, const char *propq); +void OSSL_CMP_SRV_CTX_free(OSSL_CMP_SRV_CTX *srv_ctx); +typedef OSSL_CMP_PKISI *(*OSSL_CMP_SRV_cert_request_cb_t) + (OSSL_CMP_SRV_CTX *srv_ctx, const OSSL_CMP_MSG *req, int certReqId, + const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr, + X509 **certOut, STACK_OF(X509) **chainOut, STACK_OF(X509) **caPubs); +typedef OSSL_CMP_PKISI *(*OSSL_CMP_SRV_rr_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const X509_NAME *issuer, + const ASN1_INTEGER *serial); +typedef int (*OSSL_CMP_SRV_genm_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const STACK_OF(OSSL_CMP_ITAV) *in, + STACK_OF(OSSL_CMP_ITAV) **out); +typedef void (*OSSL_CMP_SRV_error_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const OSSL_CMP_PKISI *statusInfo, + const ASN1_INTEGER *errorCode, + const OSSL_CMP_PKIFREETEXT *errDetails); +typedef int (*OSSL_CMP_SRV_certConf_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + int certReqId, + const ASN1_OCTET_STRING *certHash, + const OSSL_CMP_PKISI *si); +typedef int (*OSSL_CMP_SRV_pollReq_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, int certReqId, + OSSL_CMP_MSG **certReq, + int64_t *check_after); +int OSSL_CMP_SRV_CTX_init(OSSL_CMP_SRV_CTX *srv_ctx, void *custom_ctx, + OSSL_CMP_SRV_cert_request_cb_t process_cert_request, + OSSL_CMP_SRV_rr_cb_t process_rr, + OSSL_CMP_SRV_genm_cb_t process_genm, + OSSL_CMP_SRV_error_cb_t process_error, + OSSL_CMP_SRV_certConf_cb_t process_certConf, + OSSL_CMP_SRV_pollReq_cb_t process_pollReq); +OSSL_CMP_CTX *OSSL_CMP_SRV_CTX_get0_cmp_ctx(const OSSL_CMP_SRV_CTX *srv_ctx); +void *OSSL_CMP_SRV_CTX_get0_custom_ctx(const OSSL_CMP_SRV_CTX *srv_ctx); +int OSSL_CMP_SRV_CTX_set_send_unprotected_errors(OSSL_CMP_SRV_CTX *srv_ctx, + int val); +int OSSL_CMP_SRV_CTX_set_accept_unprotected(OSSL_CMP_SRV_CTX *srv_ctx, int val); +int OSSL_CMP_SRV_CTX_set_accept_raverified(OSSL_CMP_SRV_CTX *srv_ctx, int val); +int OSSL_CMP_SRV_CTX_set_grant_implicit_confirm(OSSL_CMP_SRV_CTX *srv_ctx, + int val); + +/* from cmp_client.c */ +X509 *OSSL_CMP_exec_certreq(OSSL_CMP_CTX *ctx, int req_type, + const OSSL_CRMF_MSG *crm); +# define OSSL_CMP_IR 0 +# define OSSL_CMP_CR 2 +# define OSSL_CMP_P10CR 4 +# define OSSL_CMP_KUR 7 +# define OSSL_CMP_exec_IR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_IR, NULL) +# define OSSL_CMP_exec_CR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_CR, NULL) +# define OSSL_CMP_exec_P10CR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_P10CR, NULL) +# define OSSL_CMP_exec_KUR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_KUR, NULL) +int OSSL_CMP_try_certreq(OSSL_CMP_CTX *ctx, int req_type, + const OSSL_CRMF_MSG *crm, int *checkAfter); +int OSSL_CMP_exec_RR_ses(OSSL_CMP_CTX *ctx); +STACK_OF(OSSL_CMP_ITAV) *OSSL_CMP_exec_GENM_ses(OSSL_CMP_CTX *ctx); + +/* from cmp_genm.c */ +int OSSL_CMP_get1_caCerts(OSSL_CMP_CTX *ctx, STACK_OF(X509) **out); +int OSSL_CMP_get1_rootCaKeyUpdate(OSSL_CMP_CTX *ctx, + const X509 *oldWithOld, X509 **newWithNew, + X509 **newWithOld, X509 **oldWithNew); + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_CMP) */ +#endif /* !defined(OPENSSL_CMP_H) */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h new file mode 100644 index 00000000000..fe86a5c7da2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cms.h @@ -0,0 +1,508 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/cms.h.in + * + * Copyright 2008-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CMS_H +# define OPENSSL_CMS_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CMS_H +# endif + +# include + +# ifndef OPENSSL_NO_CMS +# include +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + +typedef struct CMS_EnvelopedData_st CMS_EnvelopedData; +typedef struct CMS_ContentInfo_st CMS_ContentInfo; +typedef struct CMS_SignerInfo_st CMS_SignerInfo; +typedef struct CMS_SignedData_st CMS_SignedData; +typedef struct CMS_CertificateChoices CMS_CertificateChoices; +typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice; +typedef struct CMS_RecipientInfo_st CMS_RecipientInfo; +typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest; +typedef struct CMS_Receipt_st CMS_Receipt; +typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; +typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; + +SKM_DEFINE_STACK_OF_INTERNAL(CMS_SignerInfo, CMS_SignerInfo, CMS_SignerInfo) +#define sk_CMS_SignerInfo_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_value(sk, idx) ((CMS_SignerInfo *)OPENSSL_sk_value(ossl_check_const_CMS_SignerInfo_sk_type(sk), (idx))) +#define sk_CMS_SignerInfo_new(cmp) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new(ossl_check_CMS_SignerInfo_compfunc_type(cmp))) +#define sk_CMS_SignerInfo_new_null() ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new_null()) +#define sk_CMS_SignerInfo_new_reserve(cmp, n) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new_reserve(ossl_check_CMS_SignerInfo_compfunc_type(cmp), (n))) +#define sk_CMS_SignerInfo_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_SignerInfo_sk_type(sk), (n)) +#define sk_CMS_SignerInfo_free(sk) OPENSSL_sk_free(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_delete(sk, i) ((CMS_SignerInfo *)OPENSSL_sk_delete(ossl_check_CMS_SignerInfo_sk_type(sk), (i))) +#define sk_CMS_SignerInfo_delete_ptr(sk, ptr) ((CMS_SignerInfo *)OPENSSL_sk_delete_ptr(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr))) +#define sk_CMS_SignerInfo_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_pop(sk) ((CMS_SignerInfo *)OPENSSL_sk_pop(ossl_check_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_shift(sk) ((CMS_SignerInfo *)OPENSSL_sk_shift(ossl_check_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_SignerInfo_sk_type(sk),ossl_check_CMS_SignerInfo_freefunc_type(freefunc)) +#define sk_CMS_SignerInfo_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr), (idx)) +#define sk_CMS_SignerInfo_set(sk, idx, ptr) ((CMS_SignerInfo *)OPENSSL_sk_set(ossl_check_CMS_SignerInfo_sk_type(sk), (idx), ossl_check_CMS_SignerInfo_type(ptr))) +#define sk_CMS_SignerInfo_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr), pnum) +#define sk_CMS_SignerInfo_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_dup(sk) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_dup(ossl_check_const_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_copyfunc_type(copyfunc), ossl_check_CMS_SignerInfo_freefunc_type(freefunc))) +#define sk_CMS_SignerInfo_set_cmp_func(sk, cmp) ((sk_CMS_SignerInfo_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey) +#define sk_CMS_RecipientEncryptedKey_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_value(sk, idx) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_value(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk), (idx))) +#define sk_CMS_RecipientEncryptedKey_new(cmp) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new(ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp))) +#define sk_CMS_RecipientEncryptedKey_new_null() ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new_null()) +#define sk_CMS_RecipientEncryptedKey_new_reserve(cmp, n) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp), (n))) +#define sk_CMS_RecipientEncryptedKey_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (n)) +#define sk_CMS_RecipientEncryptedKey_free(sk) OPENSSL_sk_free(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_delete(sk, i) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_delete(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (i))) +#define sk_CMS_RecipientEncryptedKey_delete_ptr(sk, ptr) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr))) +#define sk_CMS_RecipientEncryptedKey_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_pop(sk) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_pop(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_shift(sk) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_shift(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk),ossl_check_CMS_RecipientEncryptedKey_freefunc_type(freefunc)) +#define sk_CMS_RecipientEncryptedKey_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr), (idx)) +#define sk_CMS_RecipientEncryptedKey_set(sk, idx, ptr) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_set(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (idx), ossl_check_CMS_RecipientEncryptedKey_type(ptr))) +#define sk_CMS_RecipientEncryptedKey_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr), pnum) +#define sk_CMS_RecipientEncryptedKey_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_dup(sk) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_dup(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_copyfunc_type(copyfunc), ossl_check_CMS_RecipientEncryptedKey_freefunc_type(freefunc))) +#define sk_CMS_RecipientEncryptedKey_set_cmp_func(sk, cmp) ((sk_CMS_RecipientEncryptedKey_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RecipientInfo, CMS_RecipientInfo, CMS_RecipientInfo) +#define sk_CMS_RecipientInfo_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_value(sk, idx) ((CMS_RecipientInfo *)OPENSSL_sk_value(ossl_check_const_CMS_RecipientInfo_sk_type(sk), (idx))) +#define sk_CMS_RecipientInfo_new(cmp) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new(ossl_check_CMS_RecipientInfo_compfunc_type(cmp))) +#define sk_CMS_RecipientInfo_new_null() ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new_null()) +#define sk_CMS_RecipientInfo_new_reserve(cmp, n) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RecipientInfo_compfunc_type(cmp), (n))) +#define sk_CMS_RecipientInfo_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RecipientInfo_sk_type(sk), (n)) +#define sk_CMS_RecipientInfo_free(sk) OPENSSL_sk_free(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_delete(sk, i) ((CMS_RecipientInfo *)OPENSSL_sk_delete(ossl_check_CMS_RecipientInfo_sk_type(sk), (i))) +#define sk_CMS_RecipientInfo_delete_ptr(sk, ptr) ((CMS_RecipientInfo *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr))) +#define sk_CMS_RecipientInfo_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_pop(sk) ((CMS_RecipientInfo *)OPENSSL_sk_pop(ossl_check_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_shift(sk) ((CMS_RecipientInfo *)OPENSSL_sk_shift(ossl_check_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RecipientInfo_sk_type(sk),ossl_check_CMS_RecipientInfo_freefunc_type(freefunc)) +#define sk_CMS_RecipientInfo_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr), (idx)) +#define sk_CMS_RecipientInfo_set(sk, idx, ptr) ((CMS_RecipientInfo *)OPENSSL_sk_set(ossl_check_CMS_RecipientInfo_sk_type(sk), (idx), ossl_check_CMS_RecipientInfo_type(ptr))) +#define sk_CMS_RecipientInfo_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr), pnum) +#define sk_CMS_RecipientInfo_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_dup(sk) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_dup(ossl_check_const_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_copyfunc_type(copyfunc), ossl_check_CMS_RecipientInfo_freefunc_type(freefunc))) +#define sk_CMS_RecipientInfo_set_cmp_func(sk, cmp) ((sk_CMS_RecipientInfo_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RevocationInfoChoice, CMS_RevocationInfoChoice, CMS_RevocationInfoChoice) +#define sk_CMS_RevocationInfoChoice_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_value(sk, idx) ((CMS_RevocationInfoChoice *)OPENSSL_sk_value(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk), (idx))) +#define sk_CMS_RevocationInfoChoice_new(cmp) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new(ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp))) +#define sk_CMS_RevocationInfoChoice_new_null() ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new_null()) +#define sk_CMS_RevocationInfoChoice_new_reserve(cmp, n) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp), (n))) +#define sk_CMS_RevocationInfoChoice_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (n)) +#define sk_CMS_RevocationInfoChoice_free(sk) OPENSSL_sk_free(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_delete(sk, i) ((CMS_RevocationInfoChoice *)OPENSSL_sk_delete(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (i))) +#define sk_CMS_RevocationInfoChoice_delete_ptr(sk, ptr) ((CMS_RevocationInfoChoice *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr))) +#define sk_CMS_RevocationInfoChoice_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_pop(sk) ((CMS_RevocationInfoChoice *)OPENSSL_sk_pop(ossl_check_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_shift(sk) ((CMS_RevocationInfoChoice *)OPENSSL_sk_shift(ossl_check_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RevocationInfoChoice_sk_type(sk),ossl_check_CMS_RevocationInfoChoice_freefunc_type(freefunc)) +#define sk_CMS_RevocationInfoChoice_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr), (idx)) +#define sk_CMS_RevocationInfoChoice_set(sk, idx, ptr) ((CMS_RevocationInfoChoice *)OPENSSL_sk_set(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (idx), ossl_check_CMS_RevocationInfoChoice_type(ptr))) +#define sk_CMS_RevocationInfoChoice_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr), pnum) +#define sk_CMS_RevocationInfoChoice_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_dup(sk) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_dup(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_copyfunc_type(copyfunc), ossl_check_CMS_RevocationInfoChoice_freefunc_type(freefunc))) +#define sk_CMS_RevocationInfoChoice_set_cmp_func(sk, cmp) ((sk_CMS_RevocationInfoChoice_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp))) + + +DECLARE_ASN1_ITEM(CMS_EnvelopedData) +DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_SignedData) +DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) +DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) +DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) + +CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +# define CMS_SIGNERINFO_ISSUER_SERIAL 0 +# define CMS_SIGNERINFO_KEYIDENTIFIER 1 + +# define CMS_RECIPINFO_NONE -1 +# define CMS_RECIPINFO_TRANS 0 +# define CMS_RECIPINFO_AGREE 1 +# define CMS_RECIPINFO_KEK 2 +# define CMS_RECIPINFO_PASS 3 +# define CMS_RECIPINFO_OTHER 4 + +/* S/MIME related flags */ + +# define CMS_TEXT 0x1 +# define CMS_NOCERTS 0x2 +# define CMS_NO_CONTENT_VERIFY 0x4 +# define CMS_NO_ATTR_VERIFY 0x8 +# define CMS_NOSIGS \ + (CMS_NO_CONTENT_VERIFY|CMS_NO_ATTR_VERIFY) +# define CMS_NOINTERN 0x10 +# define CMS_NO_SIGNER_CERT_VERIFY 0x20 +# define CMS_NOVERIFY 0x20 +# define CMS_DETACHED 0x40 +# define CMS_BINARY 0x80 +# define CMS_NOATTR 0x100 +# define CMS_NOSMIMECAP 0x200 +# define CMS_NOOLDMIMETYPE 0x400 +# define CMS_CRLFEOL 0x800 +# define CMS_STREAM 0x1000 +# define CMS_NOCRL 0x2000 +# define CMS_PARTIAL 0x4000 +# define CMS_REUSE_DIGEST 0x8000 +# define CMS_USE_KEYID 0x10000 +# define CMS_DEBUG_DECRYPT 0x20000 +# define CMS_KEY_PARAM 0x40000 +# define CMS_ASCIICRLF 0x80000 +# define CMS_CADES 0x100000 +# define CMS_USE_ORIGINATOR_KEYID 0x200000 + +const ASN1_OBJECT *CMS_get0_type(const CMS_ContentInfo *cms); + +BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont); +int CMS_dataFinal(CMS_ContentInfo *cms, BIO *bio); + +ASN1_OCTET_STRING **CMS_get0_content(CMS_ContentInfo *cms); +int CMS_is_detached(CMS_ContentInfo *cms); +int CMS_set_detached(CMS_ContentInfo *cms, int detached); + +# ifdef OPENSSL_PEM_H +DECLARE_PEM_rw(CMS, CMS_ContentInfo) +# endif +int CMS_stream(unsigned char ***boundary, CMS_ContentInfo *cms); +CMS_ContentInfo *d2i_CMS_bio(BIO *bp, CMS_ContentInfo **cms); +int i2d_CMS_bio(BIO *bp, CMS_ContentInfo *cms); + +BIO *BIO_new_CMS(BIO *out, CMS_ContentInfo *cms); +int i2d_CMS_bio_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +int PEM_write_bio_CMS_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, + int flags); +CMS_ContentInfo *SMIME_read_CMS(BIO *bio, BIO **bcont); +CMS_ContentInfo *SMIME_read_CMS_ex(BIO *bio, int flags, BIO **bcont, CMS_ContentInfo **ci); +int SMIME_write_CMS(BIO *bio, CMS_ContentInfo *cms, BIO *data, int flags); + +int CMS_final(CMS_ContentInfo *cms, BIO *data, BIO *dcont, + unsigned int flags); +int CMS_final_digest(CMS_ContentInfo *cms, + const unsigned char *md, unsigned int mdlen, BIO *dcont, + unsigned int flags); + +CMS_ContentInfo *CMS_sign(X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, BIO *data, + unsigned int flags); +CMS_ContentInfo *CMS_sign_ex(X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, BIO *data, + unsigned int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +CMS_ContentInfo *CMS_sign_receipt(CMS_SignerInfo *si, + X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, unsigned int flags); + +int CMS_data(CMS_ContentInfo *cms, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_data_create(BIO *in, unsigned int flags); +CMS_ContentInfo *CMS_data_create_ex(BIO *in, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_digest_verify(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create(BIO *in, const EVP_MD *md, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create_ex(BIO *in, const EVP_MD *md, + unsigned int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +int CMS_EncryptedData_decrypt(CMS_ContentInfo *cms, + const unsigned char *key, size_t keylen, + BIO *dcont, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_EncryptedData_encrypt(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, + size_t keylen, unsigned int flags); +CMS_ContentInfo *CMS_EncryptedData_encrypt_ex(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, + size_t keylen, unsigned int flags, + OSSL_LIB_CTX *libctx, + const char *propq); + +int CMS_EncryptedData_set1_key(CMS_ContentInfo *cms, const EVP_CIPHER *ciph, + const unsigned char *key, size_t keylen); + +int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + X509_STORE *store, BIO *dcont, BIO *out, unsigned int flags); + +int CMS_verify_receipt(CMS_ContentInfo *rcms, CMS_ContentInfo *ocms, + STACK_OF(X509) *certs, + X509_STORE *store, unsigned int flags); + +STACK_OF(X509) *CMS_get0_signers(CMS_ContentInfo *cms); + +CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags); +CMS_ContentInfo *CMS_encrypt_ex(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pkey, X509 *cert, + BIO *dcont, BIO *out, unsigned int flags); + +int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert); +int CMS_decrypt_set1_pkey_and_peer(CMS_ContentInfo *cms, EVP_PKEY *pk, + X509 *cert, X509 *peer); +int CMS_decrypt_set1_key(CMS_ContentInfo *cms, + unsigned char *key, size_t keylen, + const unsigned char *id, size_t idlen); +int CMS_decrypt_set1_password(CMS_ContentInfo *cms, + unsigned char *pass, ossl_ssize_t passlen); + +STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); +int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); +EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri); +CMS_ContentInfo *CMS_AuthEnvelopedData_create(const EVP_CIPHER *cipher); +CMS_ContentInfo * +CMS_AuthEnvelopedData_create_ex(const EVP_CIPHER *cipher, OSSL_LIB_CTX *libctx, + const char *propq); +CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher); +CMS_ContentInfo *CMS_EnvelopedData_create_ex(const EVP_CIPHER *cipher, + OSSL_LIB_CTX *libctx, + const char *propq); +BIO *CMS_EnvelopedData_decrypt(CMS_EnvelopedData *env, BIO *detached_data, + EVP_PKEY *pkey, X509 *cert, + ASN1_OCTET_STRING *secret, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, + X509 *recip, unsigned int flags); +CMS_RecipientInfo *CMS_add1_recipient(CMS_ContentInfo *cms, X509 *recip, + EVP_PKEY *originatorPrivKey, X509 * originator, unsigned int flags); +int CMS_RecipientInfo_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pkey); +int CMS_RecipientInfo_ktri_cert_cmp(CMS_RecipientInfo *ri, X509 *cert); +int CMS_RecipientInfo_ktri_get0_algs(CMS_RecipientInfo *ri, + EVP_PKEY **pk, X509 **recip, + X509_ALGOR **palg); +int CMS_RecipientInfo_ktri_get0_signer_id(CMS_RecipientInfo *ri, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +CMS_RecipientInfo *CMS_add0_recipient_key(CMS_ContentInfo *cms, int nid, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen, + ASN1_GENERALIZEDTIME *date, + ASN1_OBJECT *otherTypeId, + ASN1_TYPE *otherType); + +int CMS_RecipientInfo_kekri_get0_id(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pid, + ASN1_GENERALIZEDTIME **pdate, + ASN1_OBJECT **potherid, + ASN1_TYPE **pothertype); + +int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, + unsigned char *key, size_t keylen); + +int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, + const unsigned char *id, size_t idlen); + +int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri, + unsigned char *pass, + ossl_ssize_t passlen); + +CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, + int iter, int wrap_nid, + int pbe_nid, + unsigned char *pass, + ossl_ssize_t passlen, + const EVP_CIPHER *kekciph); + +int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); +int CMS_RecipientInfo_encrypt(const CMS_ContentInfo *cms, CMS_RecipientInfo *ri); + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags); + +int CMS_set1_eContentType(CMS_ContentInfo *cms, const ASN1_OBJECT *oid); +const ASN1_OBJECT *CMS_get0_eContentType(CMS_ContentInfo *cms); + +CMS_CertificateChoices *CMS_add0_CertificateChoices(CMS_ContentInfo *cms); +int CMS_add0_cert(CMS_ContentInfo *cms, X509 *cert); +int CMS_add1_cert(CMS_ContentInfo *cms, X509 *cert); +STACK_OF(X509) *CMS_get1_certs(CMS_ContentInfo *cms); + +CMS_RevocationInfoChoice *CMS_add0_RevocationInfoChoice(CMS_ContentInfo *cms); +int CMS_add0_crl(CMS_ContentInfo *cms, X509_CRL *crl); +int CMS_add1_crl(CMS_ContentInfo *cms, X509_CRL *crl); +STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms); + +int CMS_SignedData_init(CMS_ContentInfo *cms); +CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, + X509 *signer, EVP_PKEY *pk, const EVP_MD *md, + unsigned int flags); +EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si); +EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si); +STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); + +void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); +int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert); +int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + unsigned int flags); +void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, + X509 **signer, X509_ALGOR **pdig, + X509_ALGOR **psig); +ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si); +int CMS_SignerInfo_sign(CMS_SignerInfo *si); +int CMS_SignerInfo_verify(CMS_SignerInfo *si); +int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain); +BIO *CMS_SignedData_verify(CMS_SignedData *sd, BIO *detached_data, + STACK_OF(X509) *scerts, X509_STORE *store, + STACK_OF(X509) *extra, STACK_OF(X509_CRL) *crls, + unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_add_smimecap(CMS_SignerInfo *si, STACK_OF(X509_ALGOR) *algs); +int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, + int algnid, int keysize); +int CMS_add_standard_smimecap(STACK_OF(X509_ALGOR) **smcap); + +int CMS_signed_get_attr_count(const CMS_SignerInfo *si); +int CMS_signed_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_signed_get_attr_by_OBJ(const CMS_SignerInfo *si, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_signed_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_signed_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_signed_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_signed_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_signed_get0_data_by_OBJ(const CMS_SignerInfo *si, + const ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_unsigned_get_attr_count(const CMS_SignerInfo *si); +int CMS_unsigned_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_unsigned_get_attr_by_OBJ(const CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int lastpos); +X509_ATTRIBUTE *CMS_unsigned_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_unsigned_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_unsigned_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_unsigned_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_get1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest **prr); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0( + unsigned char *id, int idlen, int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0_ex( + unsigned char *id, int idlen, int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo, + OSSL_LIB_CTX *libctx); + +int CMS_add1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest *rr); +void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, + ASN1_STRING **pcid, + int *pallorfirst, + STACK_OF(GENERAL_NAMES) **plist, + STACK_OF(GENERAL_NAMES) **prto); +int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pukm); +STACK_OF(CMS_RecipientEncryptedKey) +*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri); + +int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri, + X509_ALGOR **pubalg, + ASN1_BIT_STRING **pubkey, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert); + +int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek, + ASN1_OCTET_STRING **keyid, + ASN1_GENERALIZEDTIME **tm, + CMS_OtherKeyAttribute **other, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek, + X509 *cert); +int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk); +int CMS_RecipientInfo_kari_set0_pkey_and_peer(CMS_RecipientInfo *ri, EVP_PKEY *pk, X509 *peer); +EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri); +int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri, + CMS_RecipientEncryptedKey *rek); + +int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg, + ASN1_OCTET_STRING *ukm, int keylen); + +/* Backward compatibility for spelling errors. */ +# define CMS_R_UNKNOWN_DIGEST_ALGORITM CMS_R_UNKNOWN_DIGEST_ALGORITHM +# define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE \ + CMS_R_UNSUPPORTED_RECIPIENTINFO_TYPE + +# ifdef __cplusplus +} +# endif +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h new file mode 100644 index 00000000000..61bb008770f --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/conf.h @@ -0,0 +1,214 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/conf.h.in + * + * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CONF_H +# define OPENSSL_CONF_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CONF_H +# endif + +# include +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *section; + char *name; + char *value; +} CONF_VALUE; + +SKM_DEFINE_STACK_OF_INTERNAL(CONF_VALUE, CONF_VALUE, CONF_VALUE) +#define sk_CONF_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_value(sk, idx) ((CONF_VALUE *)OPENSSL_sk_value(ossl_check_const_CONF_VALUE_sk_type(sk), (idx))) +#define sk_CONF_VALUE_new(cmp) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new(ossl_check_CONF_VALUE_compfunc_type(cmp))) +#define sk_CONF_VALUE_new_null() ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new_null()) +#define sk_CONF_VALUE_new_reserve(cmp, n) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_CONF_VALUE_compfunc_type(cmp), (n))) +#define sk_CONF_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CONF_VALUE_sk_type(sk), (n)) +#define sk_CONF_VALUE_free(sk) OPENSSL_sk_free(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_delete(sk, i) ((CONF_VALUE *)OPENSSL_sk_delete(ossl_check_CONF_VALUE_sk_type(sk), (i))) +#define sk_CONF_VALUE_delete_ptr(sk, ptr) ((CONF_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr))) +#define sk_CONF_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_pop(sk) ((CONF_VALUE *)OPENSSL_sk_pop(ossl_check_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_shift(sk) ((CONF_VALUE *)OPENSSL_sk_shift(ossl_check_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CONF_VALUE_sk_type(sk),ossl_check_CONF_VALUE_freefunc_type(freefunc)) +#define sk_CONF_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr), (idx)) +#define sk_CONF_VALUE_set(sk, idx, ptr) ((CONF_VALUE *)OPENSSL_sk_set(ossl_check_CONF_VALUE_sk_type(sk), (idx), ossl_check_CONF_VALUE_type(ptr))) +#define sk_CONF_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr), pnum) +#define sk_CONF_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_dup(sk) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_dup(ossl_check_const_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_copyfunc_type(copyfunc), ossl_check_CONF_VALUE_freefunc_type(freefunc))) +#define sk_CONF_VALUE_set_cmp_func(sk, cmp) ((sk_CONF_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_compfunc_type(cmp))) +DEFINE_LHASH_OF_INTERNAL(CONF_VALUE); +#define lh_CONF_VALUE_new(hfn, cmp) ((LHASH_OF(CONF_VALUE) *)OPENSSL_LH_new(ossl_check_CONF_VALUE_lh_hashfunc_type(hfn), ossl_check_CONF_VALUE_lh_compfunc_type(cmp))) +#define lh_CONF_VALUE_free(lh) OPENSSL_LH_free(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_flush(lh) OPENSSL_LH_flush(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_insert(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_insert(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_delete(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_delete(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_const_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_retrieve(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_retrieve(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_const_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_error(lh) OPENSSL_LH_error(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_num_items(lh) OPENSSL_LH_num_items(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_CONF_VALUE_lh_type(lh), dl) +#define lh_CONF_VALUE_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_CONF_VALUE_lh_doallfunc_type(dfn)) + + +struct conf_st; +struct conf_method_st; +typedef struct conf_method_st CONF_METHOD; + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# include +# endif + +/* Module definitions */ +typedef struct conf_imodule_st CONF_IMODULE; +typedef struct conf_module_st CONF_MODULE; + +STACK_OF(CONF_MODULE); +STACK_OF(CONF_IMODULE); + +/* DSO module function typedefs */ +typedef int conf_init_func (CONF_IMODULE *md, const CONF *cnf); +typedef void conf_finish_func (CONF_IMODULE *md); + +# define CONF_MFLAGS_IGNORE_ERRORS 0x1 +# define CONF_MFLAGS_IGNORE_RETURN_CODES 0x2 +# define CONF_MFLAGS_SILENT 0x4 +# define CONF_MFLAGS_NO_DSO 0x8 +# define CONF_MFLAGS_IGNORE_MISSING_FILE 0x10 +# define CONF_MFLAGS_DEFAULT_SECTION 0x20 + +int CONF_set_default_method(CONF_METHOD *meth); +void CONF_set_nconf(CONF *conf, LHASH_OF(CONF_VALUE) *hash); +LHASH_OF(CONF_VALUE) *CONF_load(LHASH_OF(CONF_VALUE) *conf, const char *file, + long *eline); +# ifndef OPENSSL_NO_STDIO +LHASH_OF(CONF_VALUE) *CONF_load_fp(LHASH_OF(CONF_VALUE) *conf, FILE *fp, + long *eline); +# endif +LHASH_OF(CONF_VALUE) *CONF_load_bio(LHASH_OF(CONF_VALUE) *conf, BIO *bp, + long *eline); +STACK_OF(CONF_VALUE) *CONF_get_section(LHASH_OF(CONF_VALUE) *conf, + const char *section); +char *CONF_get_string(LHASH_OF(CONF_VALUE) *conf, const char *group, + const char *name); +long CONF_get_number(LHASH_OF(CONF_VALUE) *conf, const char *group, + const char *name); +void CONF_free(LHASH_OF(CONF_VALUE) *conf); +#ifndef OPENSSL_NO_STDIO +int CONF_dump_fp(LHASH_OF(CONF_VALUE) *conf, FILE *out); +#endif +int CONF_dump_bio(LHASH_OF(CONF_VALUE) *conf, BIO *out); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void OPENSSL_config(const char *config_name); +#endif + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OPENSSL_no_config() \ + OPENSSL_init_crypto(OPENSSL_INIT_NO_LOAD_CONFIG, NULL) +#endif + +/* + * New conf code. The semantics are different from the functions above. If + * that wasn't the case, the above functions would have been replaced + */ + +CONF *NCONF_new_ex(OSSL_LIB_CTX *libctx, CONF_METHOD *meth); +OSSL_LIB_CTX *NCONF_get0_libctx(const CONF *conf); +CONF *NCONF_new(CONF_METHOD *meth); +CONF_METHOD *NCONF_default(void); +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 CONF_METHOD *NCONF_WIN32(void); +#endif +void NCONF_free(CONF *conf); +void NCONF_free_data(CONF *conf); + +int NCONF_load(CONF *conf, const char *file, long *eline); +# ifndef OPENSSL_NO_STDIO +int NCONF_load_fp(CONF *conf, FILE *fp, long *eline); +# endif +int NCONF_load_bio(CONF *conf, BIO *bp, long *eline); +STACK_OF(OPENSSL_CSTRING) *NCONF_get_section_names(const CONF *conf); +STACK_OF(CONF_VALUE) *NCONF_get_section(const CONF *conf, + const char *section); +char *NCONF_get_string(const CONF *conf, const char *group, const char *name); +int NCONF_get_number_e(const CONF *conf, const char *group, const char *name, + long *result); +#ifndef OPENSSL_NO_STDIO +int NCONF_dump_fp(const CONF *conf, FILE *out); +#endif +int NCONF_dump_bio(const CONF *conf, BIO *out); + +#define NCONF_get_number(c,g,n,r) NCONF_get_number_e(c,g,n,r) + +/* Module functions */ + +int CONF_modules_load(const CONF *cnf, const char *appname, + unsigned long flags); +int CONF_modules_load_file_ex(OSSL_LIB_CTX *libctx, const char *filename, + const char *appname, unsigned long flags); +int CONF_modules_load_file(const char *filename, const char *appname, + unsigned long flags); +void CONF_modules_unload(int all); +void CONF_modules_finish(void); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define CONF_modules_free() while(0) continue +#endif +int CONF_module_add(const char *name, conf_init_func *ifunc, + conf_finish_func *ffunc); + +const char *CONF_imodule_get_name(const CONF_IMODULE *md); +const char *CONF_imodule_get_value(const CONF_IMODULE *md); +void *CONF_imodule_get_usr_data(const CONF_IMODULE *md); +void CONF_imodule_set_usr_data(CONF_IMODULE *md, void *usr_data); +CONF_MODULE *CONF_imodule_get_module(const CONF_IMODULE *md); +unsigned long CONF_imodule_get_flags(const CONF_IMODULE *md); +void CONF_imodule_set_flags(CONF_IMODULE *md, unsigned long flags); +void *CONF_module_get_usr_data(CONF_MODULE *pmod); +void CONF_module_set_usr_data(CONF_MODULE *pmod, void *usr_data); + +char *CONF_get1_default_config_file(void); + +int CONF_parse_list(const char *list, int sep, int nospc, + int (*list_cb) (const char *elem, int len, void *usr), + void *arg); + +void OPENSSL_load_builtin_modules(void); + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h new file mode 100644 index 00000000000..9c7c3922e25 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/configuration.h @@ -0,0 +1,158 @@ +/* + * WARNING: do not edit! + * Generated by configdata.pm from Configurations/common0.tmpl, Configurations/unix-Makefile.tmpl + * via Makefile.in + * + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_CONFIGURATION_H +# define OPENSSL_CONFIGURATION_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +# ifdef OPENSSL_ALGORITHM_DEFINES +# error OPENSSL_ALGORITHM_DEFINES no longer supported +# endif + +/* + * OpenSSL was configured with the following options: + */ + +# define OPENSSL_CONFIGURED_API 30200 +# ifndef OPENSSL_RAND_SEED_OS +# define OPENSSL_RAND_SEED_OS +# endif +# ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +# endif +# ifndef OPENSSL_NO_ASAN +# define OPENSSL_NO_ASAN +# endif +# ifndef OPENSSL_NO_ASM +# define OPENSSL_NO_ASM +# endif +# ifndef OPENSSL_NO_BROTLI +# define OPENSSL_NO_BROTLI +# endif +# ifndef OPENSSL_NO_BROTLI_DYNAMIC +# define OPENSSL_NO_BROTLI_DYNAMIC +# endif +# ifndef OPENSSL_NO_CRYPTO_MDEBUG +# define OPENSSL_NO_CRYPTO_MDEBUG +# endif +# ifndef OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE +# define OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE +# endif +# ifndef OPENSSL_NO_DEVCRYPTOENG +# define OPENSSL_NO_DEVCRYPTOENG +# endif +# ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +# endif +# ifndef OPENSSL_NO_EGD +# define OPENSSL_NO_EGD +# endif +# ifndef OPENSSL_NO_EXTERNAL_TESTS +# define OPENSSL_NO_EXTERNAL_TESTS +# endif +# ifndef OPENSSL_NO_FUZZ_AFL +# define OPENSSL_NO_FUZZ_AFL +# endif +# ifndef OPENSSL_NO_FUZZ_LIBFUZZER +# define OPENSSL_NO_FUZZ_LIBFUZZER +# endif +# ifndef OPENSSL_NO_KTLS +# define OPENSSL_NO_KTLS +# endif +# ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +# endif +# ifndef OPENSSL_NO_MSAN +# define OPENSSL_NO_MSAN +# endif +# ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +# endif +# ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +# endif +# ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +# endif +# ifndef OPENSSL_NO_SSL3_METHOD +# define OPENSSL_NO_SSL3_METHOD +# endif +# ifndef OPENSSL_NO_TFO +# define OPENSSL_NO_TFO +# endif +# ifndef OPENSSL_NO_TRACE +# define OPENSSL_NO_TRACE +# endif +# ifndef OPENSSL_NO_UBSAN +# define OPENSSL_NO_UBSAN +# endif +# ifndef OPENSSL_NO_UNIT_TEST +# define OPENSSL_NO_UNIT_TEST +# endif +# ifndef OPENSSL_NO_UPLINK +# define OPENSSL_NO_UPLINK +# endif +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS +# define OPENSSL_NO_WEAK_SSL_CIPHERS +# endif +# ifndef OPENSSL_NO_WINSTORE +# define OPENSSL_NO_WINSTORE +# endif +# ifndef OPENSSL_NO_ZLIB +# define OPENSSL_NO_ZLIB +# endif +# ifndef OPENSSL_NO_ZLIB_DYNAMIC +# define OPENSSL_NO_ZLIB_DYNAMIC +# endif +# ifndef OPENSSL_NO_ZSTD +# define OPENSSL_NO_ZSTD +# endif +# ifndef OPENSSL_NO_ZSTD_DYNAMIC +# define OPENSSL_NO_ZSTD_DYNAMIC +# endif +# ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +# endif + + +/* Generate 80386 code? */ +# undef I386_ONLY + +/* + * The following are cipher-specific, but are part of the public API. + */ +# if !defined(OPENSSL_SYS_UEFI) +# undef BN_LLONG +/* Only one for the following should be defined */ +# define SIXTY_FOUR_BIT_LONG +# undef SIXTY_FOUR_BIT +# undef THIRTY_TWO_BIT +# endif + +# define RC4_INT unsigned char + +# if defined(OPENSSL_NO_COMP) || (defined(OPENSSL_NO_BROTLI) && defined(OPENSSL_NO_ZSTD) && defined(OPENSSL_NO_ZLIB)) +# define OPENSSL_NO_COMP_ALG +# else +# undef OPENSSL_NO_COMP_ALG +# endif + +# ifdef __cplusplus +} +# endif + +#endif /* OPENSSL_CONFIGURATION_H */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h new file mode 100644 index 00000000000..ffffe90f16f --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/core_names.h @@ -0,0 +1,475 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/core_names.h.in + * + * Copyright 2019-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#ifndef OPENSSL_CORE_NAMES_H +# define OPENSSL_CORE_NAMES_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* OSSL_CIPHER_PARAM_CTS_MODE Values */ +# define OSSL_CIPHER_CTS_MODE_CS1 "CS1" +# define OSSL_CIPHER_CTS_MODE_CS2 "CS2" +# define OSSL_CIPHER_CTS_MODE_CS3 "CS3" + +/* Known CIPHER names (not a complete list) */ +# define OSSL_CIPHER_NAME_AES_128_GCM_SIV "AES-128-GCM-SIV" +# define OSSL_CIPHER_NAME_AES_192_GCM_SIV "AES-192-GCM-SIV" +# define OSSL_CIPHER_NAME_AES_256_GCM_SIV "AES-256-GCM-SIV" + +/* Known DIGEST names (not a complete list) */ +# define OSSL_DIGEST_NAME_MD5 "MD5" +# define OSSL_DIGEST_NAME_MD5_SHA1 "MD5-SHA1" +# define OSSL_DIGEST_NAME_SHA1 "SHA1" +# define OSSL_DIGEST_NAME_SHA2_224 "SHA2-224" +# define OSSL_DIGEST_NAME_SHA2_256 "SHA2-256" +# define OSSL_DIGEST_NAME_SHA2_256_192 "SHA2-256/192" +# define OSSL_DIGEST_NAME_SHA2_384 "SHA2-384" +# define OSSL_DIGEST_NAME_SHA2_512 "SHA2-512" +# define OSSL_DIGEST_NAME_SHA2_512_224 "SHA2-512/224" +# define OSSL_DIGEST_NAME_SHA2_512_256 "SHA2-512/256" +# define OSSL_DIGEST_NAME_MD2 "MD2" +# define OSSL_DIGEST_NAME_MD4 "MD4" +# define OSSL_DIGEST_NAME_MDC2 "MDC2" +# define OSSL_DIGEST_NAME_RIPEMD160 "RIPEMD160" +# define OSSL_DIGEST_NAME_SHA3_224 "SHA3-224" +# define OSSL_DIGEST_NAME_SHA3_256 "SHA3-256" +# define OSSL_DIGEST_NAME_SHA3_384 "SHA3-384" +# define OSSL_DIGEST_NAME_SHA3_512 "SHA3-512" +# define OSSL_DIGEST_NAME_KECCAK_KMAC128 "KECCAK-KMAC-128" +# define OSSL_DIGEST_NAME_KECCAK_KMAC256 "KECCAK-KMAC-256" +# define OSSL_DIGEST_NAME_SM3 "SM3" + +/* Known MAC names */ +# define OSSL_MAC_NAME_BLAKE2BMAC "BLAKE2BMAC" +# define OSSL_MAC_NAME_BLAKE2SMAC "BLAKE2SMAC" +# define OSSL_MAC_NAME_CMAC "CMAC" +# define OSSL_MAC_NAME_GMAC "GMAC" +# define OSSL_MAC_NAME_HMAC "HMAC" +# define OSSL_MAC_NAME_KMAC128 "KMAC128" +# define OSSL_MAC_NAME_KMAC256 "KMAC256" +# define OSSL_MAC_NAME_POLY1305 "POLY1305" +# define OSSL_MAC_NAME_SIPHASH "SIPHASH" + +/* Known KDF names */ +# define OSSL_KDF_NAME_HKDF "HKDF" +# define OSSL_KDF_NAME_TLS1_3_KDF "TLS13-KDF" +# define OSSL_KDF_NAME_PBKDF1 "PBKDF1" +# define OSSL_KDF_NAME_PBKDF2 "PBKDF2" +# define OSSL_KDF_NAME_SCRYPT "SCRYPT" +# define OSSL_KDF_NAME_SSHKDF "SSHKDF" +# define OSSL_KDF_NAME_SSKDF "SSKDF" +# define OSSL_KDF_NAME_TLS1_PRF "TLS1-PRF" +# define OSSL_KDF_NAME_X942KDF_ASN1 "X942KDF-ASN1" +# define OSSL_KDF_NAME_X942KDF_CONCAT "X942KDF-CONCAT" +# define OSSL_KDF_NAME_X963KDF "X963KDF" +# define OSSL_KDF_NAME_KBKDF "KBKDF" +# define OSSL_KDF_NAME_KRB5KDF "KRB5KDF" +# define OSSL_KDF_NAME_HMACDRBGKDF "HMAC-DRBG-KDF" + +/* RSA padding modes */ +# define OSSL_PKEY_RSA_PAD_MODE_NONE "none" +# define OSSL_PKEY_RSA_PAD_MODE_PKCSV15 "pkcs1" +# define OSSL_PKEY_RSA_PAD_MODE_OAEP "oaep" +# define OSSL_PKEY_RSA_PAD_MODE_X931 "x931" +# define OSSL_PKEY_RSA_PAD_MODE_PSS "pss" + +/* RSA pss padding salt length */ +# define OSSL_PKEY_RSA_PSS_SALT_LEN_DIGEST "digest" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_MAX "max" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_AUTO "auto" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_AUTO_DIGEST_MAX "auto-digestmax" + +/* OSSL_PKEY_PARAM_EC_ENCODING values */ +# define OSSL_PKEY_EC_ENCODING_EXPLICIT "explicit" +# define OSSL_PKEY_EC_ENCODING_GROUP "named_curve" + +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_UNCOMPRESSED "uncompressed" +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_COMPRESSED "compressed" +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_HYBRID "hybrid" + +# define OSSL_PKEY_EC_GROUP_CHECK_DEFAULT "default" +# define OSSL_PKEY_EC_GROUP_CHECK_NAMED "named" +# define OSSL_PKEY_EC_GROUP_CHECK_NAMED_NIST "named-nist" + +/* OSSL_KEM_PARAM_OPERATION values */ +#define OSSL_KEM_PARAM_OPERATION_RSASVE "RSASVE" +#define OSSL_KEM_PARAM_OPERATION_DHKEM "DHKEM" + +/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ +# define OSSL_ALG_PARAM_CIPHER "cipher" +# define OSSL_ALG_PARAM_DIGEST "digest" +# define OSSL_ALG_PARAM_ENGINE "engine" +# define OSSL_ALG_PARAM_MAC "mac" +# define OSSL_ALG_PARAM_PROPERTIES "properties" +# define OSSL_ASYM_CIPHER_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_ENGINE OSSL_PKEY_PARAM_ENGINE +# define OSSL_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION "implicit-rejection" +# define OSSL_ASYM_CIPHER_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_MGF1_DIGEST_PROPS OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_ASYM_CIPHER_PARAM_OAEP_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS "digest-props" +# define OSSL_ASYM_CIPHER_PARAM_OAEP_LABEL "oaep-label" +# define OSSL_ASYM_CIPHER_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE +# define OSSL_ASYM_CIPHER_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION "tls-client-version" +# define OSSL_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION "tls-negotiated-version" +# define OSSL_CAPABILITY_TLS_GROUP_ALG "tls-group-alg" +# define OSSL_CAPABILITY_TLS_GROUP_ID "tls-group-id" +# define OSSL_CAPABILITY_TLS_GROUP_IS_KEM "tls-group-is-kem" +# define OSSL_CAPABILITY_TLS_GROUP_MAX_DTLS "tls-max-dtls" +# define OSSL_CAPABILITY_TLS_GROUP_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_GROUP_MIN_DTLS "tls-min-dtls" +# define OSSL_CAPABILITY_TLS_GROUP_MIN_TLS "tls-min-tls" +# define OSSL_CAPABILITY_TLS_GROUP_NAME "tls-group-name" +# define OSSL_CAPABILITY_TLS_GROUP_NAME_INTERNAL "tls-group-name-internal" +# define OSSL_CAPABILITY_TLS_GROUP_SECURITY_BITS "tls-group-sec-bits" +# define OSSL_CAPABILITY_TLS_SIGALG_CODE_POINT "tls-sigalg-code-point" +# define OSSL_CAPABILITY_TLS_SIGALG_HASH_NAME "tls-sigalg-hash-name" +# define OSSL_CAPABILITY_TLS_SIGALG_HASH_OID "tls-sigalg-hash-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_IANA_NAME "tls-sigalg-iana-name" +# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE "tls-sigalg-keytype" +# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE_OID "tls-sigalg-keytype-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_MIN_TLS "tls-min-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_NAME "tls-sigalg-name" +# define OSSL_CAPABILITY_TLS_SIGALG_OID "tls-sigalg-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_SECURITY_BITS "tls-sigalg-sec-bits" +# define OSSL_CAPABILITY_TLS_SIGALG_SIG_NAME "tls-sigalg-sig-name" +# define OSSL_CAPABILITY_TLS_SIGALG_SIG_OID "tls-sigalg-sig-oid" +# define OSSL_CIPHER_PARAM_AEAD "aead" +# define OSSL_CIPHER_PARAM_AEAD_IVLEN OSSL_CIPHER_PARAM_IVLEN +# define OSSL_CIPHER_PARAM_AEAD_MAC_KEY "mackey" +# define OSSL_CIPHER_PARAM_AEAD_TAG "tag" +# define OSSL_CIPHER_PARAM_AEAD_TAGLEN "taglen" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_AAD "tlsaad" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_AAD_PAD "tlsaadpad" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN "tlsivgen" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_IV_FIXED "tlsivfixed" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV "tlsivinv" +# define OSSL_CIPHER_PARAM_ALGORITHM_ID_PARAMS "alg_id_param" +# define OSSL_CIPHER_PARAM_BLOCK_SIZE "blocksize" +# define OSSL_CIPHER_PARAM_CTS "cts" +# define OSSL_CIPHER_PARAM_CTS_MODE "cts_mode" +# define OSSL_CIPHER_PARAM_CUSTOM_IV "custom-iv" +# define OSSL_CIPHER_PARAM_HAS_RAND_KEY "has-randkey" +# define OSSL_CIPHER_PARAM_IV "iv" +# define OSSL_CIPHER_PARAM_IVLEN "ivlen" +# define OSSL_CIPHER_PARAM_KEYLEN "keylen" +# define OSSL_CIPHER_PARAM_MODE "mode" +# define OSSL_CIPHER_PARAM_NUM "num" +# define OSSL_CIPHER_PARAM_PADDING "padding" +# define OSSL_CIPHER_PARAM_RANDOM_KEY "randkey" +# define OSSL_CIPHER_PARAM_RC2_KEYBITS "keybits" +# define OSSL_CIPHER_PARAM_ROUNDS "rounds" +# define OSSL_CIPHER_PARAM_SPEED "speed" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK "tls-multi" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD "tls1multi_aad" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN "tls1multi_aadpacklen" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC "tls1multi_enc" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN "tls1multi_encin" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN "tls1multi_enclen" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE "tls1multi_interleave" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE "tls1multi_maxbufsz" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT "tls1multi_maxsndfrag" +# define OSSL_CIPHER_PARAM_TLS_MAC "tls-mac" +# define OSSL_CIPHER_PARAM_TLS_MAC_SIZE "tls-mac-size" +# define OSSL_CIPHER_PARAM_TLS_VERSION "tls-version" +# define OSSL_CIPHER_PARAM_UPDATED_IV "updated-iv" +# define OSSL_CIPHER_PARAM_USE_BITS "use-bits" +# define OSSL_CIPHER_PARAM_XTS_STANDARD "xts_standard" +# define OSSL_DECODER_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_DIGEST_PARAM_ALGID_ABSENT "algid-absent" +# define OSSL_DIGEST_PARAM_BLOCK_SIZE "blocksize" +# define OSSL_DIGEST_PARAM_MICALG "micalg" +# define OSSL_DIGEST_PARAM_PAD_TYPE "pad-type" +# define OSSL_DIGEST_PARAM_SIZE "size" +# define OSSL_DIGEST_PARAM_SSL3_MS "ssl3-ms" +# define OSSL_DIGEST_PARAM_XOF "xof" +# define OSSL_DIGEST_PARAM_XOFLEN "xoflen" +# define OSSL_DRBG_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_DRBG_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_DRBG_PARAM_ENTROPY_REQUIRED "entropy_required" +# define OSSL_DRBG_PARAM_MAC OSSL_ALG_PARAM_MAC +# define OSSL_DRBG_PARAM_MAX_ADINLEN "max_adinlen" +# define OSSL_DRBG_PARAM_MAX_ENTROPYLEN "max_entropylen" +# define OSSL_DRBG_PARAM_MAX_LENGTH "maxium_length" +# define OSSL_DRBG_PARAM_MAX_NONCELEN "max_noncelen" +# define OSSL_DRBG_PARAM_MAX_PERSLEN "max_perslen" +# define OSSL_DRBG_PARAM_MIN_ENTROPYLEN "min_entropylen" +# define OSSL_DRBG_PARAM_MIN_LENGTH "minium_length" +# define OSSL_DRBG_PARAM_MIN_NONCELEN "min_noncelen" +# define OSSL_DRBG_PARAM_PREDICTION_RESISTANCE "prediction_resistance" +# define OSSL_DRBG_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_DRBG_PARAM_RANDOM_DATA "random_data" +# define OSSL_DRBG_PARAM_RESEED_COUNTER "reseed_counter" +# define OSSL_DRBG_PARAM_RESEED_REQUESTS "reseed_requests" +# define OSSL_DRBG_PARAM_RESEED_TIME "reseed_time" +# define OSSL_DRBG_PARAM_RESEED_TIME_INTERVAL "reseed_time_interval" +# define OSSL_DRBG_PARAM_SIZE "size" +# define OSSL_DRBG_PARAM_USE_DF "use_derivation_function" +# define OSSL_ENCODER_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_ENCODER_PARAM_ENCRYPT_LEVEL "encrypt-level" +# define OSSL_ENCODER_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_ENCODER_PARAM_SAVE_PARAMETERS "save-parameters" +# define OSSL_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE "ecdh-cofactor-mode" +# define OSSL_EXCHANGE_PARAM_KDF_DIGEST "kdf-digest" +# define OSSL_EXCHANGE_PARAM_KDF_DIGEST_PROPS "kdf-digest-props" +# define OSSL_EXCHANGE_PARAM_KDF_OUTLEN "kdf-outlen" +# define OSSL_EXCHANGE_PARAM_KDF_TYPE "kdf-type" +# define OSSL_EXCHANGE_PARAM_KDF_UKM "kdf-ukm" +# define OSSL_EXCHANGE_PARAM_PAD "pad" +# define OSSL_GEN_PARAM_ITERATION "iteration" +# define OSSL_GEN_PARAM_POTENTIAL "potential" +# define OSSL_KDF_PARAM_ARGON2_AD "ad" +# define OSSL_KDF_PARAM_ARGON2_LANES "lanes" +# define OSSL_KDF_PARAM_ARGON2_MEMCOST "memcost" +# define OSSL_KDF_PARAM_ARGON2_VERSION "version" +# define OSSL_KDF_PARAM_CEK_ALG "cekalg" +# define OSSL_KDF_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_KDF_PARAM_CONSTANT "constant" +# define OSSL_KDF_PARAM_DATA "data" +# define OSSL_KDF_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_KDF_PARAM_EARLY_CLEAN "early_clean" +# define OSSL_KDF_PARAM_HMACDRBG_ENTROPY "entropy" +# define OSSL_KDF_PARAM_HMACDRBG_NONCE "nonce" +# define OSSL_KDF_PARAM_INFO "info" +# define OSSL_KDF_PARAM_ITER "iter" +# define OSSL_KDF_PARAM_KBKDF_R "r" +# define OSSL_KDF_PARAM_KBKDF_USE_L "use-l" +# define OSSL_KDF_PARAM_KBKDF_USE_SEPARATOR "use-separator" +# define OSSL_KDF_PARAM_KEY "key" +# define OSSL_KDF_PARAM_LABEL "label" +# define OSSL_KDF_PARAM_MAC OSSL_ALG_PARAM_MAC +# define OSSL_KDF_PARAM_MAC_SIZE "maclen" +# define OSSL_KDF_PARAM_MODE "mode" +# define OSSL_KDF_PARAM_PASSWORD "pass" +# define OSSL_KDF_PARAM_PKCS12_ID "id" +# define OSSL_KDF_PARAM_PKCS5 "pkcs5" +# define OSSL_KDF_PARAM_PREFIX "prefix" +# define OSSL_KDF_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_KDF_PARAM_SALT "salt" +# define OSSL_KDF_PARAM_SCRYPT_MAXMEM "maxmem_bytes" +# define OSSL_KDF_PARAM_SCRYPT_N "n" +# define OSSL_KDF_PARAM_SCRYPT_P "p" +# define OSSL_KDF_PARAM_SCRYPT_R "r" +# define OSSL_KDF_PARAM_SECRET "secret" +# define OSSL_KDF_PARAM_SEED "seed" +# define OSSL_KDF_PARAM_SIZE "size" +# define OSSL_KDF_PARAM_SSHKDF_SESSION_ID "session_id" +# define OSSL_KDF_PARAM_SSHKDF_TYPE "type" +# define OSSL_KDF_PARAM_SSHKDF_XCGHASH "xcghash" +# define OSSL_KDF_PARAM_THREADS "threads" +# define OSSL_KDF_PARAM_UKM "ukm" +# define OSSL_KDF_PARAM_X942_ACVPINFO "acvp-info" +# define OSSL_KDF_PARAM_X942_PARTYUINFO "partyu-info" +# define OSSL_KDF_PARAM_X942_PARTYVINFO "partyv-info" +# define OSSL_KDF_PARAM_X942_SUPP_PRIVINFO "supp-privinfo" +# define OSSL_KDF_PARAM_X942_SUPP_PUBINFO "supp-pubinfo" +# define OSSL_KDF_PARAM_X942_USE_KEYBITS "use-keybits" +# define OSSL_KEM_PARAM_IKME "ikme" +# define OSSL_KEM_PARAM_OPERATION "operation" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING "block_padding" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA "max_early_data" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN "max_frag_len" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MODE "mode" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_OPTIONS "options" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD "read_ahead" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC "stream_mac" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_TLSTREE "tlstree" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_USE_ETM "use_etm" +# define OSSL_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN "read_buffer_len" +# define OSSL_MAC_PARAM_BLOCK_SIZE "block-size" +# define OSSL_MAC_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_MAC_PARAM_CUSTOM "custom" +# define OSSL_MAC_PARAM_C_ROUNDS "c-rounds" +# define OSSL_MAC_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_MAC_PARAM_DIGEST_NOINIT "digest-noinit" +# define OSSL_MAC_PARAM_DIGEST_ONESHOT "digest-oneshot" +# define OSSL_MAC_PARAM_D_ROUNDS "d-rounds" +# define OSSL_MAC_PARAM_IV "iv" +# define OSSL_MAC_PARAM_KEY "key" +# define OSSL_MAC_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_MAC_PARAM_SALT "salt" +# define OSSL_MAC_PARAM_SIZE "size" +# define OSSL_MAC_PARAM_TLS_DATA_SIZE "tls-data-size" +# define OSSL_MAC_PARAM_XOF "xof" +# define OSSL_OBJECT_PARAM_DATA "data" +# define OSSL_OBJECT_PARAM_DATA_STRUCTURE "data-structure" +# define OSSL_OBJECT_PARAM_DATA_TYPE "data-type" +# define OSSL_OBJECT_PARAM_DESC "desc" +# define OSSL_OBJECT_PARAM_REFERENCE "reference" +# define OSSL_OBJECT_PARAM_TYPE "type" +# define OSSL_PASSPHRASE_PARAM_INFO "info" +# define OSSL_PKEY_PARAM_BITS "bits" +# define OSSL_PKEY_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_PKEY_PARAM_DEFAULT_DIGEST "default-digest" +# define OSSL_PKEY_PARAM_DHKEM_IKM "dhkem-ikm" +# define OSSL_PKEY_PARAM_DH_GENERATOR "safeprime-generator" +# define OSSL_PKEY_PARAM_DH_PRIV_LEN "priv_len" +# define OSSL_PKEY_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_PKEY_PARAM_DIGEST_SIZE "digest-size" +# define OSSL_PKEY_PARAM_DIST_ID "distid" +# define OSSL_PKEY_PARAM_EC_A "a" +# define OSSL_PKEY_PARAM_EC_B "b" +# define OSSL_PKEY_PARAM_EC_CHAR2_M "m" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K1 "k1" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K2 "k2" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K3 "k3" +# define OSSL_PKEY_PARAM_EC_CHAR2_TP_BASIS "tp" +# define OSSL_PKEY_PARAM_EC_CHAR2_TYPE "basis-type" +# define OSSL_PKEY_PARAM_EC_COFACTOR "cofactor" +# define OSSL_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS "decoded-from-explicit" +# define OSSL_PKEY_PARAM_EC_ENCODING "encoding" +# define OSSL_PKEY_PARAM_EC_FIELD_TYPE "field-type" +# define OSSL_PKEY_PARAM_EC_GENERATOR "generator" +# define OSSL_PKEY_PARAM_EC_GROUP_CHECK_TYPE "group-check" +# define OSSL_PKEY_PARAM_EC_INCLUDE_PUBLIC "include-public" +# define OSSL_PKEY_PARAM_EC_ORDER "order" +# define OSSL_PKEY_PARAM_EC_P "p" +# define OSSL_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT "point-format" +# define OSSL_PKEY_PARAM_EC_PUB_X "qx" +# define OSSL_PKEY_PARAM_EC_PUB_Y "qy" +# define OSSL_PKEY_PARAM_EC_SEED "seed" +# define OSSL_PKEY_PARAM_ENCODED_PUBLIC_KEY "encoded-pub-key" +# define OSSL_PKEY_PARAM_ENGINE OSSL_ALG_PARAM_ENGINE +# define OSSL_PKEY_PARAM_FFC_COFACTOR "j" +# define OSSL_PKEY_PARAM_FFC_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_PKEY_PARAM_FFC_DIGEST_PROPS OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_FFC_G "g" +# define OSSL_PKEY_PARAM_FFC_GINDEX "gindex" +# define OSSL_PKEY_PARAM_FFC_H "hindex" +# define OSSL_PKEY_PARAM_FFC_P "p" +# define OSSL_PKEY_PARAM_FFC_PBITS "pbits" +# define OSSL_PKEY_PARAM_FFC_PCOUNTER "pcounter" +# define OSSL_PKEY_PARAM_FFC_Q "q" +# define OSSL_PKEY_PARAM_FFC_QBITS "qbits" +# define OSSL_PKEY_PARAM_FFC_SEED "seed" +# define OSSL_PKEY_PARAM_FFC_TYPE "type" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_G "validate-g" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_LEGACY "validate-legacy" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_PQ "validate-pq" +# define OSSL_PKEY_PARAM_GROUP_NAME "group" +# define OSSL_PKEY_PARAM_IMPLICIT_REJECTION "implicit-rejection" +# define OSSL_PKEY_PARAM_MANDATORY_DIGEST "mandatory-digest" +# define OSSL_PKEY_PARAM_MASKGENFUNC "mgf" +# define OSSL_PKEY_PARAM_MAX_SIZE "max-size" +# define OSSL_PKEY_PARAM_MGF1_DIGEST "mgf1-digest" +# define OSSL_PKEY_PARAM_MGF1_PROPERTIES "mgf1-properties" +# define OSSL_PKEY_PARAM_PAD_MODE "pad-mode" +# define OSSL_PKEY_PARAM_PRIV_KEY "priv" +# define OSSL_PKEY_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_PUB_KEY "pub" +# define OSSL_PKEY_PARAM_RSA_BITS OSSL_PKEY_PARAM_BITS +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT "rsa-coefficient" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT1 "rsa-coefficient1" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT2 "rsa-coefficient2" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT3 "rsa-coefficient3" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT4 "rsa-coefficient4" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT5 "rsa-coefficient5" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT6 "rsa-coefficient6" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT7 "rsa-coefficient7" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT8 "rsa-coefficient8" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT9 "rsa-coefficient9" +# define OSSL_PKEY_PARAM_RSA_D "d" +# define OSSL_PKEY_PARAM_RSA_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_PKEY_PARAM_RSA_DIGEST_PROPS OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_RSA_E "e" +# define OSSL_PKEY_PARAM_RSA_EXPONENT "rsa-exponent" +# define OSSL_PKEY_PARAM_RSA_EXPONENT1 "rsa-exponent1" +# define OSSL_PKEY_PARAM_RSA_EXPONENT10 "rsa-exponent10" +# define OSSL_PKEY_PARAM_RSA_EXPONENT2 "rsa-exponent2" +# define OSSL_PKEY_PARAM_RSA_EXPONENT3 "rsa-exponent3" +# define OSSL_PKEY_PARAM_RSA_EXPONENT4 "rsa-exponent4" +# define OSSL_PKEY_PARAM_RSA_EXPONENT5 "rsa-exponent5" +# define OSSL_PKEY_PARAM_RSA_EXPONENT6 "rsa-exponent6" +# define OSSL_PKEY_PARAM_RSA_EXPONENT7 "rsa-exponent7" +# define OSSL_PKEY_PARAM_RSA_EXPONENT8 "rsa-exponent8" +# define OSSL_PKEY_PARAM_RSA_EXPONENT9 "rsa-exponent9" +# define OSSL_PKEY_PARAM_RSA_FACTOR "rsa-factor" +# define OSSL_PKEY_PARAM_RSA_FACTOR1 "rsa-factor1" +# define OSSL_PKEY_PARAM_RSA_FACTOR10 "rsa-factor10" +# define OSSL_PKEY_PARAM_RSA_FACTOR2 "rsa-factor2" +# define OSSL_PKEY_PARAM_RSA_FACTOR3 "rsa-factor3" +# define OSSL_PKEY_PARAM_RSA_FACTOR4 "rsa-factor4" +# define OSSL_PKEY_PARAM_RSA_FACTOR5 "rsa-factor5" +# define OSSL_PKEY_PARAM_RSA_FACTOR6 "rsa-factor6" +# define OSSL_PKEY_PARAM_RSA_FACTOR7 "rsa-factor7" +# define OSSL_PKEY_PARAM_RSA_FACTOR8 "rsa-factor8" +# define OSSL_PKEY_PARAM_RSA_FACTOR9 "rsa-factor9" +# define OSSL_PKEY_PARAM_RSA_MASKGENFUNC OSSL_PKEY_PARAM_MASKGENFUNC +# define OSSL_PKEY_PARAM_RSA_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_PKEY_PARAM_RSA_N "n" +# define OSSL_PKEY_PARAM_RSA_PRIMES "primes" +# define OSSL_PKEY_PARAM_RSA_PSS_SALTLEN "saltlen" +# define OSSL_PKEY_PARAM_RSA_TEST_P1 "p1" +# define OSSL_PKEY_PARAM_RSA_TEST_P2 "p2" +# define OSSL_PKEY_PARAM_RSA_TEST_Q1 "q1" +# define OSSL_PKEY_PARAM_RSA_TEST_Q2 "q2" +# define OSSL_PKEY_PARAM_RSA_TEST_XP "xp" +# define OSSL_PKEY_PARAM_RSA_TEST_XP1 "xp1" +# define OSSL_PKEY_PARAM_RSA_TEST_XP2 "xp2" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ "xq" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ1 "xq1" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ2 "xq2" +# define OSSL_PKEY_PARAM_SECURITY_BITS "security-bits" +# define OSSL_PKEY_PARAM_USE_COFACTOR_ECDH OSSL_PKEY_PARAM_USE_COFACTOR_FLAG +# define OSSL_PKEY_PARAM_USE_COFACTOR_FLAG "use-cofactor-flag" +# define OSSL_PROV_PARAM_BUILDINFO "buildinfo" +# define OSSL_PROV_PARAM_CORE_MODULE_FILENAME "module-filename" +# define OSSL_PROV_PARAM_CORE_PROV_NAME "provider-name" +# define OSSL_PROV_PARAM_CORE_VERSION "openssl-version" +# define OSSL_PROV_PARAM_DRBG_TRUNC_DIGEST "drbg-no-trunc-md" +# define OSSL_PROV_PARAM_NAME "name" +# define OSSL_PROV_PARAM_SECURITY_CHECKS "security-checks" +# define OSSL_PROV_PARAM_SELF_TEST_DESC "st-desc" +# define OSSL_PROV_PARAM_SELF_TEST_PHASE "st-phase" +# define OSSL_PROV_PARAM_SELF_TEST_TYPE "st-type" +# define OSSL_PROV_PARAM_STATUS "status" +# define OSSL_PROV_PARAM_TLS1_PRF_EMS_CHECK "tls1-prf-ems-check" +# define OSSL_PROV_PARAM_VERSION "version" +# define OSSL_RAND_PARAM_GENERATE "generate" +# define OSSL_RAND_PARAM_MAX_REQUEST "max_request" +# define OSSL_RAND_PARAM_STATE "state" +# define OSSL_RAND_PARAM_STRENGTH "strength" +# define OSSL_RAND_PARAM_TEST_ENTROPY "test_entropy" +# define OSSL_RAND_PARAM_TEST_NONCE "test_nonce" +# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID "algorithm-id" +# define OSSL_SIGNATURE_PARAM_CONTEXT_STRING "context-string" +# define OSSL_SIGNATURE_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_SIGNATURE_PARAM_DIGEST_SIZE OSSL_PKEY_PARAM_DIGEST_SIZE +# define OSSL_SIGNATURE_PARAM_INSTANCE "instance" +# define OSSL_SIGNATURE_PARAM_KAT "kat" +# define OSSL_SIGNATURE_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_SIGNATURE_PARAM_MGF1_PROPERTIES OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_SIGNATURE_PARAM_NONCE_TYPE "nonce-type" +# define OSSL_SIGNATURE_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE +# define OSSL_SIGNATURE_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_SIGNATURE_PARAM_PSS_SALTLEN "saltlen" +# define OSSL_STORE_PARAM_ALIAS "alias" +# define OSSL_STORE_PARAM_DIGEST "digest" +# define OSSL_STORE_PARAM_EXPECT "expect" +# define OSSL_STORE_PARAM_FINGERPRINT "fingerprint" +# define OSSL_STORE_PARAM_INPUT_TYPE "input-type" +# define OSSL_STORE_PARAM_ISSUER "name" +# define OSSL_STORE_PARAM_PROPERTIES "properties" +# define OSSL_STORE_PARAM_SERIAL "serial" +# define OSSL_STORE_PARAM_SUBJECT "subject" + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h new file mode 100644 index 00000000000..1f901f35f89 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crmf.h @@ -0,0 +1,229 @@ +/*- + * WARNING: do not edit! + * Generated by Makefile from include/openssl/crmf.h.in + * + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright Nokia 2007-2019 + * Copyright Siemens AG 2015-2019 + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * CRMF (RFC 4211) implementation by M. Peylo, M. Viljanen, and D. von Oheimb. + */ + + + +#ifndef OPENSSL_CRMF_H +# define OPENSSL_CRMF_H + +# include + +# ifndef OPENSSL_NO_CRMF +# include +# include +# include +# include /* for GENERAL_NAME etc. */ + +/* explicit #includes not strictly needed since implied by the above: */ +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# define OSSL_CRMF_POPOPRIVKEY_THISMESSAGE 0 +# define OSSL_CRMF_POPOPRIVKEY_SUBSEQUENTMESSAGE 1 +# define OSSL_CRMF_POPOPRIVKEY_DHMAC 2 +# define OSSL_CRMF_POPOPRIVKEY_AGREEMAC 3 +# define OSSL_CRMF_POPOPRIVKEY_ENCRYPTEDKEY 4 + +# define OSSL_CRMF_SUBSEQUENTMESSAGE_ENCRCERT 0 +# define OSSL_CRMF_SUBSEQUENTMESSAGE_CHALLENGERESP 1 +typedef struct ossl_crmf_encryptedvalue_st OSSL_CRMF_ENCRYPTEDVALUE; + +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDVALUE) +typedef struct ossl_crmf_msg_st OSSL_CRMF_MSG; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSG) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_MSG) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_MSG, OSSL_CRMF_MSG, OSSL_CRMF_MSG) +#define sk_OSSL_CRMF_MSG_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_value(sk, idx) ((OSSL_CRMF_MSG *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_MSG_new(cmp) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp))) +#define sk_OSSL_CRMF_MSG_new_null() ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_MSG_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_MSG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (n)) +#define sk_OSSL_CRMF_MSG_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_delete(sk, i) ((OSSL_CRMF_MSG *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (i))) +#define sk_OSSL_CRMF_MSG_delete_ptr(sk, ptr) ((OSSL_CRMF_MSG *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr))) +#define sk_OSSL_CRMF_MSG_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_pop(sk) ((OSSL_CRMF_MSG *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_shift(sk) ((OSSL_CRMF_MSG *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_MSG_sk_type(sk),ossl_check_OSSL_CRMF_MSG_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_MSG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr), (idx)) +#define sk_OSSL_CRMF_MSG_set(sk, idx, ptr) ((OSSL_CRMF_MSG *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (idx), ossl_check_OSSL_CRMF_MSG_type(ptr))) +#define sk_OSSL_CRMF_MSG_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr), pnum) +#define sk_OSSL_CRMF_MSG_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_dup(sk) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_MSG_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_MSG_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_MSG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp))) + +typedef struct ossl_crmf_attributetypeandvalue_st OSSL_CRMF_ATTRIBUTETYPEANDVALUE; +typedef struct ossl_crmf_pbmparameter_st OSSL_CRMF_PBMPARAMETER; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_PBMPARAMETER) +typedef struct ossl_crmf_poposigningkey_st OSSL_CRMF_POPOSIGNINGKEY; +typedef struct ossl_crmf_certrequest_st OSSL_CRMF_CERTREQUEST; +typedef struct ossl_crmf_certid_st OSSL_CRMF_CERTID; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_CERTID) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_CERTID) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_CERTID, OSSL_CRMF_CERTID, OSSL_CRMF_CERTID) +#define sk_OSSL_CRMF_CERTID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_value(sk, idx) ((OSSL_CRMF_CERTID *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_CERTID_new(cmp) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp))) +#define sk_OSSL_CRMF_CERTID_new_null() ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_CERTID_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_CERTID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (n)) +#define sk_OSSL_CRMF_CERTID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_delete(sk, i) ((OSSL_CRMF_CERTID *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (i))) +#define sk_OSSL_CRMF_CERTID_delete_ptr(sk, ptr) ((OSSL_CRMF_CERTID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr))) +#define sk_OSSL_CRMF_CERTID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_pop(sk) ((OSSL_CRMF_CERTID *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_shift(sk) ((OSSL_CRMF_CERTID *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_CERTID_sk_type(sk),ossl_check_OSSL_CRMF_CERTID_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_CERTID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr), (idx)) +#define sk_OSSL_CRMF_CERTID_set(sk, idx, ptr) ((OSSL_CRMF_CERTID *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (idx), ossl_check_OSSL_CRMF_CERTID_type(ptr))) +#define sk_OSSL_CRMF_CERTID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr), pnum) +#define sk_OSSL_CRMF_CERTID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_dup(sk) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_CERTID_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_CERTID_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_CERTID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp))) + + +typedef struct ossl_crmf_pkipublicationinfo_st OSSL_CRMF_PKIPUBLICATIONINFO; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_PKIPUBLICATIONINFO) +typedef struct ossl_crmf_singlepubinfo_st OSSL_CRMF_SINGLEPUBINFO; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_SINGLEPUBINFO) +typedef struct ossl_crmf_certtemplate_st OSSL_CRMF_CERTTEMPLATE; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_CERTTEMPLATE) +typedef STACK_OF(OSSL_CRMF_MSG) OSSL_CRMF_MSGS; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSGS) + +typedef struct ossl_crmf_optionalvalidity_st OSSL_CRMF_OPTIONALVALIDITY; + +/* crmf_pbm.c */ +OSSL_CRMF_PBMPARAMETER *OSSL_CRMF_pbmp_new(OSSL_LIB_CTX *libctx, size_t slen, + int owfnid, size_t itercnt, + int macnid); +int OSSL_CRMF_pbm_new(OSSL_LIB_CTX *libctx, const char *propq, + const OSSL_CRMF_PBMPARAMETER *pbmp, + const unsigned char *msg, size_t msglen, + const unsigned char *sec, size_t seclen, + unsigned char **mac, size_t *maclen); + +/* crmf_lib.c */ +int OSSL_CRMF_MSG_set1_regCtrl_regToken(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *tok); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regCtrl_regToken(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_authenticator(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *auth); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regCtrl_authenticator(const OSSL_CRMF_MSG *msg); +int +OSSL_CRMF_MSG_PKIPublicationInfo_push0_SinglePubInfo(OSSL_CRMF_PKIPUBLICATIONINFO *pi, + OSSL_CRMF_SINGLEPUBINFO *spi); +# define OSSL_CRMF_PUB_METHOD_DONTCARE 0 +# define OSSL_CRMF_PUB_METHOD_X500 1 +# define OSSL_CRMF_PUB_METHOD_WEB 2 +# define OSSL_CRMF_PUB_METHOD_LDAP 3 +int OSSL_CRMF_MSG_set0_SinglePubInfo(OSSL_CRMF_SINGLEPUBINFO *spi, + int method, GENERAL_NAME *nm); +# define OSSL_CRMF_PUB_ACTION_DONTPUBLISH 0 +# define OSSL_CRMF_PUB_ACTION_PLEASEPUBLISH 1 +int OSSL_CRMF_MSG_set_PKIPublicationInfo_action(OSSL_CRMF_PKIPUBLICATIONINFO *pi, + int action); +int OSSL_CRMF_MSG_set1_regCtrl_pkiPublicationInfo(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_PKIPUBLICATIONINFO *pi); +OSSL_CRMF_PKIPUBLICATIONINFO +*OSSL_CRMF_MSG_get0_regCtrl_pkiPublicationInfo(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_protocolEncrKey(OSSL_CRMF_MSG *msg, + const X509_PUBKEY *pubkey); +X509_PUBKEY +*OSSL_CRMF_MSG_get0_regCtrl_protocolEncrKey(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_oldCertID(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_CERTID *cid); +OSSL_CRMF_CERTID +*OSSL_CRMF_MSG_get0_regCtrl_oldCertID(const OSSL_CRMF_MSG *msg); +OSSL_CRMF_CERTID *OSSL_CRMF_CERTID_gen(const X509_NAME *issuer, + const ASN1_INTEGER *serial); + +int OSSL_CRMF_MSG_set1_regInfo_utf8Pairs(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *utf8pairs); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regInfo_utf8Pairs(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regInfo_certReq(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_CERTREQUEST *cr); +OSSL_CRMF_CERTREQUEST +*OSSL_CRMF_MSG_get0_regInfo_certReq(const OSSL_CRMF_MSG *msg); + +int OSSL_CRMF_MSG_set0_validity(OSSL_CRMF_MSG *crm, + ASN1_TIME *notBefore, ASN1_TIME *notAfter); +int OSSL_CRMF_MSG_set_certReqId(OSSL_CRMF_MSG *crm, int rid); +int OSSL_CRMF_MSG_get_certReqId(const OSSL_CRMF_MSG *crm); +int OSSL_CRMF_MSG_set0_extensions(OSSL_CRMF_MSG *crm, X509_EXTENSIONS *exts); + +int OSSL_CRMF_MSG_push0_extension(OSSL_CRMF_MSG *crm, X509_EXTENSION *ext); +# define OSSL_CRMF_POPO_NONE -1 +# define OSSL_CRMF_POPO_RAVERIFIED 0 +# define OSSL_CRMF_POPO_SIGNATURE 1 +# define OSSL_CRMF_POPO_KEYENC 2 +# define OSSL_CRMF_POPO_KEYAGREE 3 +int OSSL_CRMF_MSG_create_popo(int meth, OSSL_CRMF_MSG *crm, + EVP_PKEY *pkey, const EVP_MD *digest, + OSSL_LIB_CTX *libctx, const char *propq); +int OSSL_CRMF_MSGS_verify_popo(const OSSL_CRMF_MSGS *reqs, + int rid, int acceptRAVerified, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_CRMF_CERTTEMPLATE *OSSL_CRMF_MSG_get0_tmpl(const OSSL_CRMF_MSG *crm); +X509_PUBKEY +*OSSL_CRMF_CERTTEMPLATE_get0_publicKey(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTTEMPLATE_get0_subject(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTTEMPLATE_get0_issuer(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const ASN1_INTEGER +*OSSL_CRMF_CERTTEMPLATE_get0_serialNumber(const OSSL_CRMF_CERTTEMPLATE *tmpl); +X509_EXTENSIONS +*OSSL_CRMF_CERTTEMPLATE_get0_extensions(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTID_get0_issuer(const OSSL_CRMF_CERTID *cid); +const ASN1_INTEGER +*OSSL_CRMF_CERTID_get0_serialNumber(const OSSL_CRMF_CERTID *cid); +int OSSL_CRMF_CERTTEMPLATE_fill(OSSL_CRMF_CERTTEMPLATE *tmpl, + EVP_PKEY *pubkey, + const X509_NAME *subject, + const X509_NAME *issuer, + const ASN1_INTEGER *serial); +X509 +*OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey); + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_CRMF) */ +#endif /* !defined(OPENSSL_CRMF_H) */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h new file mode 100644 index 00000000000..55e00dccdc1 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/crypto.h @@ -0,0 +1,561 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/crypto.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CRYPTO_H +# define OPENSSL_CRYPTO_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CRYPTO_H +# endif + +# include +# include + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# endif + +# include +# include +# include +# include +# include +# include + +# ifdef CHARSET_EBCDIC +# include +# endif + +/* + * Resolve problems on some operating systems with symbol names that clash + * one way or another + */ +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSLeay OpenSSL_version_num +# define SSLeay_version OpenSSL_version +# define SSLEAY_VERSION_NUMBER OPENSSL_VERSION_NUMBER +# define SSLEAY_VERSION OPENSSL_VERSION +# define SSLEAY_CFLAGS OPENSSL_CFLAGS +# define SSLEAY_BUILT_ON OPENSSL_BUILT_ON +# define SSLEAY_PLATFORM OPENSSL_PLATFORM +# define SSLEAY_DIR OPENSSL_DIR + +/* + * Old type for allocating dynamic locks. No longer used. Use the new thread + * API instead. + */ +typedef struct { + int dummy; +} CRYPTO_dynlock; + +# endif /* OPENSSL_NO_DEPRECATED_1_1_0 */ + +typedef void CRYPTO_RWLOCK; + +CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void); +__owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock); +__owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock); +int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock); +void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); + +int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock); + +/* No longer needed, so this is a no-op */ +#define OPENSSL_malloc_init() while(0) continue + +# define OPENSSL_malloc(num) \ + CRYPTO_malloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_zalloc(num) \ + CRYPTO_zalloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_realloc(addr, num) \ + CRYPTO_realloc(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_clear_realloc(addr, old_num, num) \ + CRYPTO_clear_realloc(addr, old_num, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_clear_free(addr, num) \ + CRYPTO_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_free(addr) \ + CRYPTO_free(addr, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_memdup(str, s) \ + CRYPTO_memdup((str), s, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_strdup(str) \ + CRYPTO_strdup(str, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_strndup(str, n) \ + CRYPTO_strndup(str, n, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_malloc(num) \ + CRYPTO_secure_malloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_zalloc(num) \ + CRYPTO_secure_zalloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_free(addr) \ + CRYPTO_secure_free(addr, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_clear_free(addr, num) \ + CRYPTO_secure_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_actual_size(ptr) \ + CRYPTO_secure_actual_size(ptr) + +size_t OPENSSL_strlcpy(char *dst, const char *src, size_t siz); +size_t OPENSSL_strlcat(char *dst, const char *src, size_t siz); +size_t OPENSSL_strnlen(const char *str, size_t maxlen); +int OPENSSL_buf2hexstr_ex(char *str, size_t str_n, size_t *strlength, + const unsigned char *buf, size_t buflen, + const char sep); +char *OPENSSL_buf2hexstr(const unsigned char *buf, long buflen); +int OPENSSL_hexstr2buf_ex(unsigned char *buf, size_t buf_n, size_t *buflen, + const char *str, const char sep); +unsigned char *OPENSSL_hexstr2buf(const char *str, long *buflen); +int OPENSSL_hexchar2int(unsigned char c); +int OPENSSL_strcasecmp(const char *s1, const char *s2); +int OPENSSL_strncasecmp(const char *s1, const char *s2, size_t n); + +# define OPENSSL_MALLOC_MAX_NELEMS(type) (((1U<<(sizeof(int)*8-1))-1)/sizeof(type)) + +/* + * These functions return the values of OPENSSL_VERSION_MAJOR, + * OPENSSL_VERSION_MINOR, OPENSSL_VERSION_PATCH, OPENSSL_VERSION_PRE_RELEASE + * and OPENSSL_VERSION_BUILD_METADATA, respectively. + */ +unsigned int OPENSSL_version_major(void); +unsigned int OPENSSL_version_minor(void); +unsigned int OPENSSL_version_patch(void); +const char *OPENSSL_version_pre_release(void); +const char *OPENSSL_version_build_metadata(void); + +unsigned long OpenSSL_version_num(void); +const char *OpenSSL_version(int type); +# define OPENSSL_VERSION 0 +# define OPENSSL_CFLAGS 1 +# define OPENSSL_BUILT_ON 2 +# define OPENSSL_PLATFORM 3 +# define OPENSSL_DIR 4 +# define OPENSSL_ENGINES_DIR 5 +# define OPENSSL_VERSION_STRING 6 +# define OPENSSL_FULL_VERSION_STRING 7 +# define OPENSSL_MODULES_DIR 8 +# define OPENSSL_CPU_INFO 9 + +const char *OPENSSL_info(int type); +/* + * The series starts at 1001 to avoid confusion with the OpenSSL_version + * types. + */ +# define OPENSSL_INFO_CONFIG_DIR 1001 +# define OPENSSL_INFO_ENGINES_DIR 1002 +# define OPENSSL_INFO_MODULES_DIR 1003 +# define OPENSSL_INFO_DSO_EXTENSION 1004 +# define OPENSSL_INFO_DIR_FILENAME_SEPARATOR 1005 +# define OPENSSL_INFO_LIST_SEPARATOR 1006 +# define OPENSSL_INFO_SEED_SOURCE 1007 +# define OPENSSL_INFO_CPU_SETTINGS 1008 + +int OPENSSL_issetugid(void); + +struct crypto_ex_data_st { + OSSL_LIB_CTX *ctx; + STACK_OF(void) *sk; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(void, void, void) +#define sk_void_num(sk) OPENSSL_sk_num(ossl_check_const_void_sk_type(sk)) +#define sk_void_value(sk, idx) ((void *)OPENSSL_sk_value(ossl_check_const_void_sk_type(sk), (idx))) +#define sk_void_new(cmp) ((STACK_OF(void) *)OPENSSL_sk_new(ossl_check_void_compfunc_type(cmp))) +#define sk_void_new_null() ((STACK_OF(void) *)OPENSSL_sk_new_null()) +#define sk_void_new_reserve(cmp, n) ((STACK_OF(void) *)OPENSSL_sk_new_reserve(ossl_check_void_compfunc_type(cmp), (n))) +#define sk_void_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_void_sk_type(sk), (n)) +#define sk_void_free(sk) OPENSSL_sk_free(ossl_check_void_sk_type(sk)) +#define sk_void_zero(sk) OPENSSL_sk_zero(ossl_check_void_sk_type(sk)) +#define sk_void_delete(sk, i) ((void *)OPENSSL_sk_delete(ossl_check_void_sk_type(sk), (i))) +#define sk_void_delete_ptr(sk, ptr) ((void *)OPENSSL_sk_delete_ptr(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr))) +#define sk_void_push(sk, ptr) OPENSSL_sk_push(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_pop(sk) ((void *)OPENSSL_sk_pop(ossl_check_void_sk_type(sk))) +#define sk_void_shift(sk) ((void *)OPENSSL_sk_shift(ossl_check_void_sk_type(sk))) +#define sk_void_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_void_sk_type(sk),ossl_check_void_freefunc_type(freefunc)) +#define sk_void_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr), (idx)) +#define sk_void_set(sk, idx, ptr) ((void *)OPENSSL_sk_set(ossl_check_void_sk_type(sk), (idx), ossl_check_void_type(ptr))) +#define sk_void_find(sk, ptr) OPENSSL_sk_find(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr), pnum) +#define sk_void_sort(sk) OPENSSL_sk_sort(ossl_check_void_sk_type(sk)) +#define sk_void_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_void_sk_type(sk)) +#define sk_void_dup(sk) ((STACK_OF(void) *)OPENSSL_sk_dup(ossl_check_const_void_sk_type(sk))) +#define sk_void_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(void) *)OPENSSL_sk_deep_copy(ossl_check_const_void_sk_type(sk), ossl_check_void_copyfunc_type(copyfunc), ossl_check_void_freefunc_type(freefunc))) +#define sk_void_set_cmp_func(sk, cmp) ((sk_void_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_void_sk_type(sk), ossl_check_void_compfunc_type(cmp))) + + + +/* + * Per class, we have a STACK of function pointers. + */ +# define CRYPTO_EX_INDEX_SSL 0 +# define CRYPTO_EX_INDEX_SSL_CTX 1 +# define CRYPTO_EX_INDEX_SSL_SESSION 2 +# define CRYPTO_EX_INDEX_X509 3 +# define CRYPTO_EX_INDEX_X509_STORE 4 +# define CRYPTO_EX_INDEX_X509_STORE_CTX 5 +# define CRYPTO_EX_INDEX_DH 6 +# define CRYPTO_EX_INDEX_DSA 7 +# define CRYPTO_EX_INDEX_EC_KEY 8 +# define CRYPTO_EX_INDEX_RSA 9 +# define CRYPTO_EX_INDEX_ENGINE 10 +# define CRYPTO_EX_INDEX_UI 11 +# define CRYPTO_EX_INDEX_BIO 12 +# define CRYPTO_EX_INDEX_APP 13 +# define CRYPTO_EX_INDEX_UI_METHOD 14 +# define CRYPTO_EX_INDEX_RAND_DRBG 15 +# define CRYPTO_EX_INDEX_DRBG CRYPTO_EX_INDEX_RAND_DRBG +# define CRYPTO_EX_INDEX_OSSL_LIB_CTX 16 +# define CRYPTO_EX_INDEX_EVP_PKEY 17 +# define CRYPTO_EX_INDEX__COUNT 18 + +typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from, + void **from_d, int idx, long argl, void *argp); +__owur int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp, + CRYPTO_EX_new *new_func, + CRYPTO_EX_dup *dup_func, + CRYPTO_EX_free *free_func); +/* No longer use an index. */ +int CRYPTO_free_ex_index(int class_index, int idx); + +/* + * Initialise/duplicate/free CRYPTO_EX_DATA variables corresponding to a + * given class (invokes whatever per-class callbacks are applicable) + */ +int CRYPTO_new_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad); +int CRYPTO_dup_ex_data(int class_index, CRYPTO_EX_DATA *to, + const CRYPTO_EX_DATA *from); + +void CRYPTO_free_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad); + +/* Allocate a single item in the CRYPTO_EX_DATA variable */ +int CRYPTO_alloc_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad, + int idx); + +/* + * Get/set data in a CRYPTO_EX_DATA variable corresponding to a particular + * index (relative to the class type involved) + */ +int CRYPTO_set_ex_data(CRYPTO_EX_DATA *ad, int idx, void *val); +void *CRYPTO_get_ex_data(const CRYPTO_EX_DATA *ad, int idx); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* + * This function cleans up all "ex_data" state. It mustn't be called under + * potential race-conditions. + */ +# define CRYPTO_cleanup_all_ex_data() while(0) continue + +/* + * The old locking functions have been removed completely without compatibility + * macros. This is because the old functions either could not properly report + * errors, or the returned error values were not clearly documented. + * Replacing the locking functions with no-ops would cause race condition + * issues in the affected applications. It is far better for them to fail at + * compile time. + * On the other hand, the locking callbacks are no longer used. Consequently, + * the callback management functions can be safely replaced with no-op macros. + */ +# define CRYPTO_num_locks() (1) +# define CRYPTO_set_locking_callback(func) +# define CRYPTO_get_locking_callback() (NULL) +# define CRYPTO_set_add_lock_callback(func) +# define CRYPTO_get_add_lock_callback() (NULL) + +/* + * These defines where used in combination with the old locking callbacks, + * they are not called anymore, but old code that's not called might still + * use them. + */ +# define CRYPTO_LOCK 1 +# define CRYPTO_UNLOCK 2 +# define CRYPTO_READ 4 +# define CRYPTO_WRITE 8 + +/* This structure is no longer used */ +typedef struct crypto_threadid_st { + int dummy; +} CRYPTO_THREADID; +/* Only use CRYPTO_THREADID_set_[numeric|pointer]() within callbacks */ +# define CRYPTO_THREADID_set_numeric(id, val) +# define CRYPTO_THREADID_set_pointer(id, ptr) +# define CRYPTO_THREADID_set_callback(threadid_func) (0) +# define CRYPTO_THREADID_get_callback() (NULL) +# define CRYPTO_THREADID_current(id) +# define CRYPTO_THREADID_cmp(a, b) (-1) +# define CRYPTO_THREADID_cpy(dest, src) +# define CRYPTO_THREADID_hash(id) (0UL) + +# ifndef OPENSSL_NO_DEPRECATED_1_0_0 +# define CRYPTO_set_id_callback(func) +# define CRYPTO_get_id_callback() (NULL) +# define CRYPTO_thread_id() (0UL) +# endif /* OPENSSL_NO_DEPRECATED_1_0_0 */ + +# define CRYPTO_set_dynlock_create_callback(dyn_create_function) +# define CRYPTO_set_dynlock_lock_callback(dyn_lock_function) +# define CRYPTO_set_dynlock_destroy_callback(dyn_destroy_function) +# define CRYPTO_get_dynlock_create_callback() (NULL) +# define CRYPTO_get_dynlock_lock_callback() (NULL) +# define CRYPTO_get_dynlock_destroy_callback() (NULL) +# endif /* OPENSSL_NO_DEPRECATED_1_1_0 */ + +typedef void *(*CRYPTO_malloc_fn)(size_t num, const char *file, int line); +typedef void *(*CRYPTO_realloc_fn)(void *addr, size_t num, const char *file, + int line); +typedef void (*CRYPTO_free_fn)(void *addr, const char *file, int line); +int CRYPTO_set_mem_functions(CRYPTO_malloc_fn malloc_fn, + CRYPTO_realloc_fn realloc_fn, + CRYPTO_free_fn free_fn); +void CRYPTO_get_mem_functions(CRYPTO_malloc_fn *malloc_fn, + CRYPTO_realloc_fn *realloc_fn, + CRYPTO_free_fn *free_fn); + +OSSL_CRYPTO_ALLOC void *CRYPTO_malloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_zalloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_memdup(const void *str, size_t siz, const char *file, int line); +OSSL_CRYPTO_ALLOC char *CRYPTO_strdup(const char *str, const char *file, int line); +OSSL_CRYPTO_ALLOC char *CRYPTO_strndup(const char *str, size_t s, const char *file, int line); +void CRYPTO_free(void *ptr, const char *file, int line); +void CRYPTO_clear_free(void *ptr, size_t num, const char *file, int line); +void *CRYPTO_realloc(void *addr, size_t num, const char *file, int line); +void *CRYPTO_clear_realloc(void *addr, size_t old_num, size_t num, + const char *file, int line); + +int CRYPTO_secure_malloc_init(size_t sz, size_t minsize); +int CRYPTO_secure_malloc_done(void); +OSSL_CRYPTO_ALLOC void *CRYPTO_secure_malloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_secure_zalloc(size_t num, const char *file, int line); +void CRYPTO_secure_free(void *ptr, const char *file, int line); +void CRYPTO_secure_clear_free(void *ptr, size_t num, + const char *file, int line); +int CRYPTO_secure_allocated(const void *ptr); +int CRYPTO_secure_malloc_initialized(void); +size_t CRYPTO_secure_actual_size(void *ptr); +size_t CRYPTO_secure_used(void); + +void OPENSSL_cleanse(void *ptr, size_t len); + +# ifndef OPENSSL_NO_CRYPTO_MDEBUG +/* + * The following can be used to detect memory leaks in the library. If + * used, it turns on malloc checking + */ +# define CRYPTO_MEM_CHECK_OFF 0x0 /* Control only */ +# define CRYPTO_MEM_CHECK_ON 0x1 /* Control and mode bit */ +# define CRYPTO_MEM_CHECK_ENABLE 0x2 /* Control and mode bit */ +# define CRYPTO_MEM_CHECK_DISABLE 0x3 /* Control only */ + +void CRYPTO_get_alloc_counts(int *mcount, int *rcount, int *fcount); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define OPENSSL_mem_debug_push(info) \ + CRYPTO_mem_debug_push(info, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_mem_debug_pop() \ + CRYPTO_mem_debug_pop() +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_set_mem_debug(int flag); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_ctrl(int mode); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_debug_push(const char *info, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_debug_pop(void); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_malloc(void *addr, size_t num, + int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_realloc(void *addr1, void *addr2, + size_t num, int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_free(void *addr, int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 +int CRYPTO_mem_leaks_cb(int (*cb)(const char *str, size_t len, void *u), + void *u); +# endif +# ifndef OPENSSL_NO_STDIO +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_leaks_fp(FILE *); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_leaks(BIO *bio); +# endif +# endif /* OPENSSL_NO_CRYPTO_MDEBUG */ + +/* die if we have to */ +ossl_noreturn void OPENSSL_die(const char *assertion, const char *file, int line); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OpenSSLDie(f,l,a) OPENSSL_die((a),(f),(l)) +# endif +# define OPENSSL_assert(e) \ + (void)((e) ? 0 : (OPENSSL_die("assertion failed: " #e, OPENSSL_FILE, OPENSSL_LINE), 1)) + +int OPENSSL_isservice(void); + +void OPENSSL_init(void); +# ifdef OPENSSL_SYS_UNIX +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_prepare(void); +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_parent(void); +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_child(void); +# endif +# endif + +struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result); +int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec); +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to); + +/* + * CRYPTO_memcmp returns zero iff the |len| bytes at |a| and |b| are equal. + * It takes an amount of time dependent on |len|, but independent of the + * contents of |a| and |b|. Unlike memcmp, it cannot be used to put elements + * into a defined order as the return value when a != b is undefined, other + * than to be non-zero. + */ +int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len); + +/* Standard initialisation options */ +# define OPENSSL_INIT_NO_LOAD_CRYPTO_STRINGS 0x00000001L +# define OPENSSL_INIT_LOAD_CRYPTO_STRINGS 0x00000002L +# define OPENSSL_INIT_ADD_ALL_CIPHERS 0x00000004L +# define OPENSSL_INIT_ADD_ALL_DIGESTS 0x00000008L +# define OPENSSL_INIT_NO_ADD_ALL_CIPHERS 0x00000010L +# define OPENSSL_INIT_NO_ADD_ALL_DIGESTS 0x00000020L +# define OPENSSL_INIT_LOAD_CONFIG 0x00000040L +# define OPENSSL_INIT_NO_LOAD_CONFIG 0x00000080L +# define OPENSSL_INIT_ASYNC 0x00000100L +# define OPENSSL_INIT_ENGINE_RDRAND 0x00000200L +# define OPENSSL_INIT_ENGINE_DYNAMIC 0x00000400L +# define OPENSSL_INIT_ENGINE_OPENSSL 0x00000800L +# define OPENSSL_INIT_ENGINE_CRYPTODEV 0x00001000L +# define OPENSSL_INIT_ENGINE_CAPI 0x00002000L +# define OPENSSL_INIT_ENGINE_PADLOCK 0x00004000L +# define OPENSSL_INIT_ENGINE_AFALG 0x00008000L +/* FREE: 0x00010000L */ +# define OPENSSL_INIT_ATFORK 0x00020000L +/* OPENSSL_INIT_BASE_ONLY 0x00040000L */ +# define OPENSSL_INIT_NO_ATEXIT 0x00080000L +/* OPENSSL_INIT flag range 0x03f00000 reserved for OPENSSL_init_ssl() */ +/* FREE: 0x04000000L */ +/* FREE: 0x08000000L */ +/* FREE: 0x10000000L */ +/* FREE: 0x20000000L */ +/* FREE: 0x40000000L */ +/* FREE: 0x80000000L */ +/* Max OPENSSL_INIT flag value is 0x80000000 */ + +/* openssl and dasync not counted as builtin */ +# define OPENSSL_INIT_ENGINE_ALL_BUILTIN \ + (OPENSSL_INIT_ENGINE_RDRAND | OPENSSL_INIT_ENGINE_DYNAMIC \ + | OPENSSL_INIT_ENGINE_CRYPTODEV | OPENSSL_INIT_ENGINE_CAPI | \ + OPENSSL_INIT_ENGINE_PADLOCK) + +/* Library initialisation functions */ +void OPENSSL_cleanup(void); +int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings); +int OPENSSL_atexit(void (*handler)(void)); +void OPENSSL_thread_stop(void); +void OPENSSL_thread_stop_ex(OSSL_LIB_CTX *ctx); + +/* Low-level control of initialization */ +OPENSSL_INIT_SETTINGS *OPENSSL_INIT_new(void); +# ifndef OPENSSL_NO_STDIO +int OPENSSL_INIT_set_config_filename(OPENSSL_INIT_SETTINGS *settings, + const char *config_filename); +void OPENSSL_INIT_set_config_file_flags(OPENSSL_INIT_SETTINGS *settings, + unsigned long flags); +int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings, + const char *config_appname); +# endif +void OPENSSL_INIT_free(OPENSSL_INIT_SETTINGS *settings); + +# if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) +# if defined(_WIN32) +# if defined(BASETYPES) || defined(_WINDEF_H) +/* application has to include in order to use this */ +typedef DWORD CRYPTO_THREAD_LOCAL; +typedef DWORD CRYPTO_THREAD_ID; + +typedef LONG CRYPTO_ONCE; +# define CRYPTO_ONCE_STATIC_INIT 0 +# endif +# else +# if defined(__TANDEM) && defined(_SPT_MODEL_) +# define SPT_THREAD_SIGNAL 1 +# define SPT_THREAD_AWARE 1 +# include +# else +# include +# endif +typedef pthread_once_t CRYPTO_ONCE; +typedef pthread_key_t CRYPTO_THREAD_LOCAL; +typedef pthread_t CRYPTO_THREAD_ID; + +# define CRYPTO_ONCE_STATIC_INIT PTHREAD_ONCE_INIT +# endif +# endif + +# if !defined(CRYPTO_ONCE_STATIC_INIT) +typedef unsigned int CRYPTO_ONCE; +typedef unsigned int CRYPTO_THREAD_LOCAL; +typedef unsigned int CRYPTO_THREAD_ID; +# define CRYPTO_ONCE_STATIC_INIT 0 +# endif + +int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)); + +int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)); +void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key); +int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val); +int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key); + +CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void); +int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b); + +OSSL_LIB_CTX *OSSL_LIB_CTX_new(void); +OSSL_LIB_CTX *OSSL_LIB_CTX_new_from_dispatch(const OSSL_CORE_HANDLE *handle, + const OSSL_DISPATCH *in); +OSSL_LIB_CTX *OSSL_LIB_CTX_new_child(const OSSL_CORE_HANDLE *handle, + const OSSL_DISPATCH *in); +int OSSL_LIB_CTX_load_config(OSSL_LIB_CTX *ctx, const char *config_file); +void OSSL_LIB_CTX_free(OSSL_LIB_CTX *); +OSSL_LIB_CTX *OSSL_LIB_CTX_get0_global_default(void); +OSSL_LIB_CTX *OSSL_LIB_CTX_set0_default(OSSL_LIB_CTX *libctx); + +void OSSL_sleep(uint64_t millis); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h new file mode 100644 index 00000000000..e6dd1192a4e --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ct.h @@ -0,0 +1,573 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ct.h.in + * + * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CT_H +# define OPENSSL_CT_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CT_H +# endif + +# include + +# ifndef OPENSSL_NO_CT +# include +# include +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + + +/* Minimum RSA key size, from RFC6962 */ +# define SCT_MIN_RSA_BITS 2048 + +/* All hashes are SHA256 in v1 of Certificate Transparency */ +# define CT_V1_HASHLEN SHA256_DIGEST_LENGTH + +SKM_DEFINE_STACK_OF_INTERNAL(SCT, SCT, SCT) +#define sk_SCT_num(sk) OPENSSL_sk_num(ossl_check_const_SCT_sk_type(sk)) +#define sk_SCT_value(sk, idx) ((SCT *)OPENSSL_sk_value(ossl_check_const_SCT_sk_type(sk), (idx))) +#define sk_SCT_new(cmp) ((STACK_OF(SCT) *)OPENSSL_sk_new(ossl_check_SCT_compfunc_type(cmp))) +#define sk_SCT_new_null() ((STACK_OF(SCT) *)OPENSSL_sk_new_null()) +#define sk_SCT_new_reserve(cmp, n) ((STACK_OF(SCT) *)OPENSSL_sk_new_reserve(ossl_check_SCT_compfunc_type(cmp), (n))) +#define sk_SCT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SCT_sk_type(sk), (n)) +#define sk_SCT_free(sk) OPENSSL_sk_free(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_zero(sk) OPENSSL_sk_zero(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_delete(sk, i) ((SCT *)OPENSSL_sk_delete(ossl_check_SCT_sk_type(sk), (i))) +#define sk_SCT_delete_ptr(sk, ptr) ((SCT *)OPENSSL_sk_delete_ptr(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr))) +#define sk_SCT_push(sk, ptr) OPENSSL_sk_push(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_pop(sk) ((SCT *)OPENSSL_sk_pop(ossl_check_SCT_sk_type(sk))) +#define sk_SCT_shift(sk) ((SCT *)OPENSSL_sk_shift(ossl_check_SCT_sk_type(sk))) +#define sk_SCT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SCT_sk_type(sk),ossl_check_SCT_freefunc_type(freefunc)) +#define sk_SCT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr), (idx)) +#define sk_SCT_set(sk, idx, ptr) ((SCT *)OPENSSL_sk_set(ossl_check_SCT_sk_type(sk), (idx), ossl_check_SCT_type(ptr))) +#define sk_SCT_find(sk, ptr) OPENSSL_sk_find(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr), pnum) +#define sk_SCT_sort(sk) OPENSSL_sk_sort(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SCT_sk_type(sk)) +#define sk_SCT_dup(sk) ((STACK_OF(SCT) *)OPENSSL_sk_dup(ossl_check_const_SCT_sk_type(sk))) +#define sk_SCT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SCT) *)OPENSSL_sk_deep_copy(ossl_check_const_SCT_sk_type(sk), ossl_check_SCT_copyfunc_type(copyfunc), ossl_check_SCT_freefunc_type(freefunc))) +#define sk_SCT_set_cmp_func(sk, cmp) ((sk_SCT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SCT_sk_type(sk), ossl_check_SCT_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CTLOG, CTLOG, CTLOG) +#define sk_CTLOG_num(sk) OPENSSL_sk_num(ossl_check_const_CTLOG_sk_type(sk)) +#define sk_CTLOG_value(sk, idx) ((CTLOG *)OPENSSL_sk_value(ossl_check_const_CTLOG_sk_type(sk), (idx))) +#define sk_CTLOG_new(cmp) ((STACK_OF(CTLOG) *)OPENSSL_sk_new(ossl_check_CTLOG_compfunc_type(cmp))) +#define sk_CTLOG_new_null() ((STACK_OF(CTLOG) *)OPENSSL_sk_new_null()) +#define sk_CTLOG_new_reserve(cmp, n) ((STACK_OF(CTLOG) *)OPENSSL_sk_new_reserve(ossl_check_CTLOG_compfunc_type(cmp), (n))) +#define sk_CTLOG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CTLOG_sk_type(sk), (n)) +#define sk_CTLOG_free(sk) OPENSSL_sk_free(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_zero(sk) OPENSSL_sk_zero(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_delete(sk, i) ((CTLOG *)OPENSSL_sk_delete(ossl_check_CTLOG_sk_type(sk), (i))) +#define sk_CTLOG_delete_ptr(sk, ptr) ((CTLOG *)OPENSSL_sk_delete_ptr(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr))) +#define sk_CTLOG_push(sk, ptr) OPENSSL_sk_push(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_pop(sk) ((CTLOG *)OPENSSL_sk_pop(ossl_check_CTLOG_sk_type(sk))) +#define sk_CTLOG_shift(sk) ((CTLOG *)OPENSSL_sk_shift(ossl_check_CTLOG_sk_type(sk))) +#define sk_CTLOG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CTLOG_sk_type(sk),ossl_check_CTLOG_freefunc_type(freefunc)) +#define sk_CTLOG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr), (idx)) +#define sk_CTLOG_set(sk, idx, ptr) ((CTLOG *)OPENSSL_sk_set(ossl_check_CTLOG_sk_type(sk), (idx), ossl_check_CTLOG_type(ptr))) +#define sk_CTLOG_find(sk, ptr) OPENSSL_sk_find(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr), pnum) +#define sk_CTLOG_sort(sk) OPENSSL_sk_sort(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CTLOG_sk_type(sk)) +#define sk_CTLOG_dup(sk) ((STACK_OF(CTLOG) *)OPENSSL_sk_dup(ossl_check_const_CTLOG_sk_type(sk))) +#define sk_CTLOG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CTLOG) *)OPENSSL_sk_deep_copy(ossl_check_const_CTLOG_sk_type(sk), ossl_check_CTLOG_copyfunc_type(copyfunc), ossl_check_CTLOG_freefunc_type(freefunc))) +#define sk_CTLOG_set_cmp_func(sk, cmp) ((sk_CTLOG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_compfunc_type(cmp))) + + + +typedef enum { + CT_LOG_ENTRY_TYPE_NOT_SET = -1, + CT_LOG_ENTRY_TYPE_X509 = 0, + CT_LOG_ENTRY_TYPE_PRECERT = 1 +} ct_log_entry_type_t; + +typedef enum { + SCT_VERSION_NOT_SET = -1, + SCT_VERSION_V1 = 0 +} sct_version_t; + +typedef enum { + SCT_SOURCE_UNKNOWN, + SCT_SOURCE_TLS_EXTENSION, + SCT_SOURCE_X509V3_EXTENSION, + SCT_SOURCE_OCSP_STAPLED_RESPONSE +} sct_source_t; + +typedef enum { + SCT_VALIDATION_STATUS_NOT_SET, + SCT_VALIDATION_STATUS_UNKNOWN_LOG, + SCT_VALIDATION_STATUS_VALID, + SCT_VALIDATION_STATUS_INVALID, + SCT_VALIDATION_STATUS_UNVERIFIED, + SCT_VALIDATION_STATUS_UNKNOWN_VERSION +} sct_validation_status_t; + +/****************************************** + * CT policy evaluation context functions * + ******************************************/ + +/* + * Creates a new, empty policy evaluation context associated with the given + * library context and property query string. + * The caller is responsible for calling CT_POLICY_EVAL_CTX_free when finished + * with the CT_POLICY_EVAL_CTX. + */ +CT_POLICY_EVAL_CTX *CT_POLICY_EVAL_CTX_new_ex(OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CT_POLICY_EVAL_CTX_new_ex() but the default library + * context and property query string is used. + */ +CT_POLICY_EVAL_CTX *CT_POLICY_EVAL_CTX_new(void); + +/* Deletes a policy evaluation context and anything it owns. */ +void CT_POLICY_EVAL_CTX_free(CT_POLICY_EVAL_CTX *ctx); + +/* Gets the peer certificate that the SCTs are for */ +X509* CT_POLICY_EVAL_CTX_get0_cert(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the certificate associated with the received SCTs. + * Increments the reference count of cert. + * Returns 1 on success, 0 otherwise. + */ +int CT_POLICY_EVAL_CTX_set1_cert(CT_POLICY_EVAL_CTX *ctx, X509 *cert); + +/* Gets the issuer of the aforementioned certificate */ +X509* CT_POLICY_EVAL_CTX_get0_issuer(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the issuer of the certificate associated with the received SCTs. + * Increments the reference count of issuer. + * Returns 1 on success, 0 otherwise. + */ +int CT_POLICY_EVAL_CTX_set1_issuer(CT_POLICY_EVAL_CTX *ctx, X509 *issuer); + +/* Gets the CT logs that are trusted sources of SCTs */ +const CTLOG_STORE *CT_POLICY_EVAL_CTX_get0_log_store(const CT_POLICY_EVAL_CTX *ctx); + +/* Sets the log store that is in use. It must outlive the CT_POLICY_EVAL_CTX. */ +void CT_POLICY_EVAL_CTX_set_shared_CTLOG_STORE(CT_POLICY_EVAL_CTX *ctx, + CTLOG_STORE *log_store); + +/* + * Gets the time, in milliseconds since the Unix epoch, that will be used as the + * current time when checking whether an SCT was issued in the future. + * Such SCTs will fail validation, as required by RFC6962. + */ +uint64_t CT_POLICY_EVAL_CTX_get_time(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the time to evaluate SCTs against, in milliseconds since the Unix epoch. + * If an SCT's timestamp is after this time, it will be interpreted as having + * been issued in the future. RFC6962 states that "TLS clients MUST reject SCTs + * whose timestamp is in the future", so an SCT will not validate in this case. + */ +void CT_POLICY_EVAL_CTX_set_time(CT_POLICY_EVAL_CTX *ctx, uint64_t time_in_ms); + +/***************** + * SCT functions * + *****************/ + +/* + * Creates a new, blank SCT. + * The caller is responsible for calling SCT_free when finished with the SCT. + */ +SCT *SCT_new(void); + +/* + * Creates a new SCT from some base64-encoded strings. + * The caller is responsible for calling SCT_free when finished with the SCT. + */ +SCT *SCT_new_from_base64(unsigned char version, + const char *logid_base64, + ct_log_entry_type_t entry_type, + uint64_t timestamp, + const char *extensions_base64, + const char *signature_base64); + +/* + * Frees the SCT and the underlying data structures. + */ +void SCT_free(SCT *sct); + +/* + * Free a stack of SCTs, and the underlying SCTs themselves. + * Intended to be compatible with X509V3_EXT_FREE. + */ +void SCT_LIST_free(STACK_OF(SCT) *a); + +/* + * Returns the version of the SCT. + */ +sct_version_t SCT_get_version(const SCT *sct); + +/* + * Set the version of an SCT. + * Returns 1 on success, 0 if the version is unrecognized. + */ +__owur int SCT_set_version(SCT *sct, sct_version_t version); + +/* + * Returns the log entry type of the SCT. + */ +ct_log_entry_type_t SCT_get_log_entry_type(const SCT *sct); + +/* + * Set the log entry type of an SCT. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_log_entry_type(SCT *sct, ct_log_entry_type_t entry_type); + +/* + * Gets the ID of the log that an SCT came from. + * Ownership of the log ID remains with the SCT. + * Returns the length of the log ID. + */ +size_t SCT_get0_log_id(const SCT *sct, unsigned char **log_id); + +/* + * Set the log ID of an SCT to point directly to the *log_id specified. + * The SCT takes ownership of the specified pointer. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set0_log_id(SCT *sct, unsigned char *log_id, size_t log_id_len); + +/* + * Set the log ID of an SCT. + * This makes a copy of the log_id. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_log_id(SCT *sct, const unsigned char *log_id, + size_t log_id_len); + +/* + * Returns the timestamp for the SCT (epoch time in milliseconds). + */ +uint64_t SCT_get_timestamp(const SCT *sct); + +/* + * Set the timestamp of an SCT (epoch time in milliseconds). + */ +void SCT_set_timestamp(SCT *sct, uint64_t timestamp); + +/* + * Return the NID for the signature used by the SCT. + * For CT v1, this will be either NID_sha256WithRSAEncryption or + * NID_ecdsa_with_SHA256 (or NID_undef if incorrect/unset). + */ +int SCT_get_signature_nid(const SCT *sct); + +/* + * Set the signature type of an SCT + * For CT v1, this should be either NID_sha256WithRSAEncryption or + * NID_ecdsa_with_SHA256. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_signature_nid(SCT *sct, int nid); + +/* + * Set *ext to point to the extension data for the SCT. ext must not be NULL. + * The SCT retains ownership of this pointer. + * Returns length of the data pointed to. + */ +size_t SCT_get0_extensions(const SCT *sct, unsigned char **ext); + +/* + * Set the extensions of an SCT to point directly to the *ext specified. + * The SCT takes ownership of the specified pointer. + */ +void SCT_set0_extensions(SCT *sct, unsigned char *ext, size_t ext_len); + +/* + * Set the extensions of an SCT. + * This takes a copy of the ext. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_extensions(SCT *sct, const unsigned char *ext, + size_t ext_len); + +/* + * Set *sig to point to the signature for the SCT. sig must not be NULL. + * The SCT retains ownership of this pointer. + * Returns length of the data pointed to. + */ +size_t SCT_get0_signature(const SCT *sct, unsigned char **sig); + +/* + * Set the signature of an SCT to point directly to the *sig specified. + * The SCT takes ownership of the specified pointer. + */ +void SCT_set0_signature(SCT *sct, unsigned char *sig, size_t sig_len); + +/* + * Set the signature of an SCT to be a copy of the *sig specified. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_signature(SCT *sct, const unsigned char *sig, + size_t sig_len); + +/* + * The origin of this SCT, e.g. TLS extension, OCSP response, etc. + */ +sct_source_t SCT_get_source(const SCT *sct); + +/* + * Set the origin of this SCT, e.g. TLS extension, OCSP response, etc. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_source(SCT *sct, sct_source_t source); + +/* + * Returns a text string describing the validation status of |sct|. + */ +const char *SCT_validation_status_string(const SCT *sct); + +/* + * Pretty-prints an |sct| to |out|. + * It will be indented by the number of spaces specified by |indent|. + * If |logs| is not NULL, it will be used to lookup the CT log that the SCT came + * from, so that the log name can be printed. + */ +void SCT_print(const SCT *sct, BIO *out, int indent, const CTLOG_STORE *logs); + +/* + * Pretty-prints an |sct_list| to |out|. + * It will be indented by the number of spaces specified by |indent|. + * SCTs will be delimited by |separator|. + * If |logs| is not NULL, it will be used to lookup the CT log that each SCT + * came from, so that the log names can be printed. + */ +void SCT_LIST_print(const STACK_OF(SCT) *sct_list, BIO *out, int indent, + const char *separator, const CTLOG_STORE *logs); + +/* + * Gets the last result of validating this SCT. + * If it has not been validated yet, returns SCT_VALIDATION_STATUS_NOT_SET. + */ +sct_validation_status_t SCT_get_validation_status(const SCT *sct); + +/* + * Validates the given SCT with the provided context. + * Sets the "validation_status" field of the SCT. + * Returns 1 if the SCT is valid and the signature verifies. + * Returns 0 if the SCT is invalid or could not be verified. + * Returns -1 if an error occurs. + */ +__owur int SCT_validate(SCT *sct, const CT_POLICY_EVAL_CTX *ctx); + +/* + * Validates the given list of SCTs with the provided context. + * Sets the "validation_status" field of each SCT. + * Returns 1 if there are no invalid SCTs and all signatures verify. + * Returns 0 if at least one SCT is invalid or could not be verified. + * Returns a negative integer if an error occurs. + */ +__owur int SCT_LIST_validate(const STACK_OF(SCT) *scts, + CT_POLICY_EVAL_CTX *ctx); + + +/********************************* + * SCT parsing and serialization * + *********************************/ + +/* + * Serialize (to TLS format) a stack of SCTs and return the length. + * "a" must not be NULL. + * If "pp" is NULL, just return the length of what would have been serialized. + * If "pp" is not NULL and "*pp" is null, function will allocate a new pointer + * for data that caller is responsible for freeing (only if function returns + * successfully). + * If "pp" is NULL and "*pp" is not NULL, caller is responsible for ensuring + * that "*pp" is large enough to accept all of the serialized data. + * Returns < 0 on error, >= 0 indicating bytes written (or would have been) + * on success. + */ +__owur int i2o_SCT_LIST(const STACK_OF(SCT) *a, unsigned char **pp); + +/* + * Convert TLS format SCT list to a stack of SCTs. + * If "a" or "*a" is NULL, a new stack will be created that the caller is + * responsible for freeing (by calling SCT_LIST_free). + * "**pp" and "*pp" must not be NULL. + * Upon success, "*pp" will point to after the last bytes read, and a stack + * will be returned. + * Upon failure, a NULL pointer will be returned, and the position of "*pp" is + * not defined. + */ +STACK_OF(SCT) *o2i_SCT_LIST(STACK_OF(SCT) **a, const unsigned char **pp, + size_t len); + +/* + * Serialize (to DER format) a stack of SCTs and return the length. + * "a" must not be NULL. + * If "pp" is NULL, just returns the length of what would have been serialized. + * If "pp" is not NULL and "*pp" is null, function will allocate a new pointer + * for data that caller is responsible for freeing (only if function returns + * successfully). + * If "pp" is NULL and "*pp" is not NULL, caller is responsible for ensuring + * that "*pp" is large enough to accept all of the serialized data. + * Returns < 0 on error, >= 0 indicating bytes written (or would have been) + * on success. + */ +__owur int i2d_SCT_LIST(const STACK_OF(SCT) *a, unsigned char **pp); + +/* + * Parses an SCT list in DER format and returns it. + * If "a" or "*a" is NULL, a new stack will be created that the caller is + * responsible for freeing (by calling SCT_LIST_free). + * "**pp" and "*pp" must not be NULL. + * Upon success, "*pp" will point to after the last bytes read, and a stack + * will be returned. + * Upon failure, a NULL pointer will be returned, and the position of "*pp" is + * not defined. + */ +STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, const unsigned char **pp, + long len); + +/* + * Serialize (to TLS format) an |sct| and write it to |out|. + * If |out| is null, no SCT will be output but the length will still be returned. + * If |out| points to a null pointer, a string will be allocated to hold the + * TLS-format SCT. It is the responsibility of the caller to free it. + * If |out| points to an allocated string, the TLS-format SCT will be written + * to it. + * The length of the SCT in TLS format will be returned. + */ +__owur int i2o_SCT(const SCT *sct, unsigned char **out); + +/* + * Parses an SCT in TLS format and returns it. + * If |psct| is not null, it will end up pointing to the parsed SCT. If it + * already points to a non-null pointer, the pointer will be free'd. + * |in| should be a pointer to a string containing the TLS-format SCT. + * |in| will be advanced to the end of the SCT if parsing succeeds. + * |len| should be the length of the SCT in |in|. + * Returns NULL if an error occurs. + * If the SCT is an unsupported version, only the SCT's 'sct' and 'sct_len' + * fields will be populated (with |in| and |len| respectively). + */ +SCT *o2i_SCT(SCT **psct, const unsigned char **in, size_t len); + +/******************** + * CT log functions * + ********************/ + +/* + * Creates a new CT log instance with the given |public_key| and |name| and + * associates it with the give library context |libctx| and property query + * string |propq|. + * Takes ownership of |public_key| but copies |name|. + * Returns NULL if malloc fails or if |public_key| cannot be converted to DER. + * Should be deleted by the caller using CTLOG_free when no longer needed. + */ +CTLOG *CTLOG_new_ex(EVP_PKEY *public_key, const char *name, OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CTLOG_new_ex except that the default library context and + * property query string are used. + */ +CTLOG *CTLOG_new(EVP_PKEY *public_key, const char *name); + +/* + * Creates a new CTLOG instance with the base64-encoded SubjectPublicKeyInfo DER + * in |pkey_base64| and associated with the given library context |libctx| and + * property query string |propq|. The |name| is a string to help users identify + * this log. + * Returns 1 on success, 0 on failure. + * Should be deleted by the caller using CTLOG_free when no longer needed. + */ +int CTLOG_new_from_base64_ex(CTLOG **ct_log, const char *pkey_base64, + const char *name, OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CTLOG_new_from_base64_ex() except that the default + * library context and property query string are used. + * Returns 1 on success, 0 on failure. + */ +int CTLOG_new_from_base64(CTLOG ** ct_log, + const char *pkey_base64, const char *name); + +/* + * Deletes a CT log instance and its fields. + */ +void CTLOG_free(CTLOG *log); + +/* Gets the name of the CT log */ +const char *CTLOG_get0_name(const CTLOG *log); +/* Gets the ID of the CT log */ +void CTLOG_get0_log_id(const CTLOG *log, const uint8_t **log_id, + size_t *log_id_len); +/* Gets the public key of the CT log */ +EVP_PKEY *CTLOG_get0_public_key(const CTLOG *log); + +/************************** + * CT log store functions * + **************************/ + +/* + * Creates a new CT log store and associates it with the given libctx and + * property query string. + * Should be deleted by the caller using CTLOG_STORE_free when no longer needed. + */ +CTLOG_STORE *CTLOG_STORE_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +/* + * Same as CTLOG_STORE_new_ex except that the default libctx and + * property query string are used. + * Should be deleted by the caller using CTLOG_STORE_free when no longer needed. + */ +CTLOG_STORE *CTLOG_STORE_new(void); + +/* + * Deletes a CT log store and all of the CT log instances held within. + */ +void CTLOG_STORE_free(CTLOG_STORE *store); + +/* + * Finds a CT log in the store based on its log ID. + * Returns the CT log, or NULL if no match is found. + */ +const CTLOG *CTLOG_STORE_get0_log_by_id(const CTLOG_STORE *store, + const uint8_t *log_id, + size_t log_id_len); + +/* + * Loads a CT log list into a |store| from a |file|. + * Returns 1 if loading is successful, or 0 otherwise. + */ +__owur int CTLOG_STORE_load_file(CTLOG_STORE *store, const char *file); + +/* + * Loads the default CT log list into a |store|. + * Returns 1 if loading is successful, or 0 otherwise. + */ +__owur int CTLOG_STORE_load_default_file(CTLOG_STORE *store); + +# ifdef __cplusplus +} +# endif +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h new file mode 100644 index 00000000000..b987e31f8c8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/err.h @@ -0,0 +1,511 @@ +/* + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ERR_H +# define OPENSSL_ERR_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ERR_H +# endif + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# include +# endif + +# include +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_FILENAMES +# define ERR_PUT_error(l,f,r,fn,ln) ERR_put_error(l,f,r,fn,ln) +# else +# define ERR_PUT_error(l,f,r,fn,ln) ERR_put_error(l,f,r,NULL,0) +# endif +# endif + +# include +# include + +# define ERR_TXT_MALLOCED 0x01 +# define ERR_TXT_STRING 0x02 + +# if !defined(OPENSSL_NO_DEPRECATED_3_0) || defined(OSSL_FORCE_ERR_STATE) +# define ERR_FLAG_MARK 0x01 +# define ERR_FLAG_CLEAR 0x02 + +# define ERR_NUM_ERRORS 16 +struct err_state_st { + int err_flags[ERR_NUM_ERRORS]; + int err_marks[ERR_NUM_ERRORS]; + unsigned long err_buffer[ERR_NUM_ERRORS]; + char *err_data[ERR_NUM_ERRORS]; + size_t err_data_size[ERR_NUM_ERRORS]; + int err_data_flags[ERR_NUM_ERRORS]; + char *err_file[ERR_NUM_ERRORS]; + int err_line[ERR_NUM_ERRORS]; + char *err_func[ERR_NUM_ERRORS]; + int top, bottom; +}; +# endif + +/* library */ +# define ERR_LIB_NONE 1 +# define ERR_LIB_SYS 2 +# define ERR_LIB_BN 3 +# define ERR_LIB_RSA 4 +# define ERR_LIB_DH 5 +# define ERR_LIB_EVP 6 +# define ERR_LIB_BUF 7 +# define ERR_LIB_OBJ 8 +# define ERR_LIB_PEM 9 +# define ERR_LIB_DSA 10 +# define ERR_LIB_X509 11 +/* #define ERR_LIB_METH 12 */ +# define ERR_LIB_ASN1 13 +# define ERR_LIB_CONF 14 +# define ERR_LIB_CRYPTO 15 +# define ERR_LIB_EC 16 +# define ERR_LIB_SSL 20 +/* #define ERR_LIB_SSL23 21 */ +/* #define ERR_LIB_SSL2 22 */ +/* #define ERR_LIB_SSL3 23 */ +/* #define ERR_LIB_RSAREF 30 */ +/* #define ERR_LIB_PROXY 31 */ +# define ERR_LIB_BIO 32 +# define ERR_LIB_PKCS7 33 +# define ERR_LIB_X509V3 34 +# define ERR_LIB_PKCS12 35 +# define ERR_LIB_RAND 36 +# define ERR_LIB_DSO 37 +# define ERR_LIB_ENGINE 38 +# define ERR_LIB_OCSP 39 +# define ERR_LIB_UI 40 +# define ERR_LIB_COMP 41 +# define ERR_LIB_ECDSA 42 +# define ERR_LIB_ECDH 43 +# define ERR_LIB_OSSL_STORE 44 +# define ERR_LIB_FIPS 45 +# define ERR_LIB_CMS 46 +# define ERR_LIB_TS 47 +# define ERR_LIB_HMAC 48 +/* # define ERR_LIB_JPAKE 49 */ +# define ERR_LIB_CT 50 +# define ERR_LIB_ASYNC 51 +# define ERR_LIB_KDF 52 +# define ERR_LIB_SM2 53 +# define ERR_LIB_ESS 54 +# define ERR_LIB_PROP 55 +# define ERR_LIB_CRMF 56 +# define ERR_LIB_PROV 57 +# define ERR_LIB_CMP 58 +# define ERR_LIB_OSSL_ENCODER 59 +# define ERR_LIB_OSSL_DECODER 60 +# define ERR_LIB_HTTP 61 + +# define ERR_LIB_USER 128 + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define ASN1err(f, r) ERR_raise_data(ERR_LIB_ASN1, (r), NULL) +# define ASYNCerr(f, r) ERR_raise_data(ERR_LIB_ASYNC, (r), NULL) +# define BIOerr(f, r) ERR_raise_data(ERR_LIB_BIO, (r), NULL) +# define BNerr(f, r) ERR_raise_data(ERR_LIB_BN, (r), NULL) +# define BUFerr(f, r) ERR_raise_data(ERR_LIB_BUF, (r), NULL) +# define CMPerr(f, r) ERR_raise_data(ERR_LIB_CMP, (r), NULL) +# define CMSerr(f, r) ERR_raise_data(ERR_LIB_CMS, (r), NULL) +# define COMPerr(f, r) ERR_raise_data(ERR_LIB_COMP, (r), NULL) +# define CONFerr(f, r) ERR_raise_data(ERR_LIB_CONF, (r), NULL) +# define CRMFerr(f, r) ERR_raise_data(ERR_LIB_CRMF, (r), NULL) +# define CRYPTOerr(f, r) ERR_raise_data(ERR_LIB_CRYPTO, (r), NULL) +# define CTerr(f, r) ERR_raise_data(ERR_LIB_CT, (r), NULL) +# define DHerr(f, r) ERR_raise_data(ERR_LIB_DH, (r), NULL) +# define DSAerr(f, r) ERR_raise_data(ERR_LIB_DSA, (r), NULL) +# define DSOerr(f, r) ERR_raise_data(ERR_LIB_DSO, (r), NULL) +# define ECDHerr(f, r) ERR_raise_data(ERR_LIB_ECDH, (r), NULL) +# define ECDSAerr(f, r) ERR_raise_data(ERR_LIB_ECDSA, (r), NULL) +# define ECerr(f, r) ERR_raise_data(ERR_LIB_EC, (r), NULL) +# define ENGINEerr(f, r) ERR_raise_data(ERR_LIB_ENGINE, (r), NULL) +# define ESSerr(f, r) ERR_raise_data(ERR_LIB_ESS, (r), NULL) +# define EVPerr(f, r) ERR_raise_data(ERR_LIB_EVP, (r), NULL) +# define FIPSerr(f, r) ERR_raise_data(ERR_LIB_FIPS, (r), NULL) +# define HMACerr(f, r) ERR_raise_data(ERR_LIB_HMAC, (r), NULL) +# define HTTPerr(f, r) ERR_raise_data(ERR_LIB_HTTP, (r), NULL) +# define KDFerr(f, r) ERR_raise_data(ERR_LIB_KDF, (r), NULL) +# define OBJerr(f, r) ERR_raise_data(ERR_LIB_OBJ, (r), NULL) +# define OCSPerr(f, r) ERR_raise_data(ERR_LIB_OCSP, (r), NULL) +# define OSSL_STOREerr(f, r) ERR_raise_data(ERR_LIB_OSSL_STORE, (r), NULL) +# define PEMerr(f, r) ERR_raise_data(ERR_LIB_PEM, (r), NULL) +# define PKCS12err(f, r) ERR_raise_data(ERR_LIB_PKCS12, (r), NULL) +# define PKCS7err(f, r) ERR_raise_data(ERR_LIB_PKCS7, (r), NULL) +# define PROPerr(f, r) ERR_raise_data(ERR_LIB_PROP, (r), NULL) +# define PROVerr(f, r) ERR_raise_data(ERR_LIB_PROV, (r), NULL) +# define RANDerr(f, r) ERR_raise_data(ERR_LIB_RAND, (r), NULL) +# define RSAerr(f, r) ERR_raise_data(ERR_LIB_RSA, (r), NULL) +# define KDFerr(f, r) ERR_raise_data(ERR_LIB_KDF, (r), NULL) +# define SM2err(f, r) ERR_raise_data(ERR_LIB_SM2, (r), NULL) +# define SSLerr(f, r) ERR_raise_data(ERR_LIB_SSL, (r), NULL) +# define SYSerr(f, r) ERR_raise_data(ERR_LIB_SYS, (r), NULL) +# define TSerr(f, r) ERR_raise_data(ERR_LIB_TS, (r), NULL) +# define UIerr(f, r) ERR_raise_data(ERR_LIB_UI, (r), NULL) +# define X509V3err(f, r) ERR_raise_data(ERR_LIB_X509V3, (r), NULL) +# define X509err(f, r) ERR_raise_data(ERR_LIB_X509, (r), NULL) +# endif + +/*- + * The error code packs differently depending on if it records a system + * error or an OpenSSL error. + * + * A system error packs like this (we follow POSIX and only allow positive + * numbers that fit in an |int|): + * + * +-+-------------------------------------------------------------+ + * |1| system error number | + * +-+-------------------------------------------------------------+ + * + * An OpenSSL error packs like this: + * + * <---------------------------- 32 bits --------------------------> + * <--- 8 bits ---><------------------ 23 bits -----------------> + * +-+---------------+---------------------------------------------+ + * |0| library | reason | + * +-+---------------+---------------------------------------------+ + * + * A few of the reason bits are reserved as flags with special meaning: + * + * <5 bits-<>--------- 19 bits -----------------> + * +-------+-+-----------------------------------+ + * | rflags| | reason | + * +-------+-+-----------------------------------+ + * ^ + * | + * ERR_RFLAG_FATAL = ERR_R_FATAL + * + * The reason flags are part of the overall reason code for practical + * reasons, as they provide an easy way to place different types of + * reason codes in different numeric ranges. + * + * The currently known reason flags are: + * + * ERR_RFLAG_FATAL Flags that the reason code is considered fatal. + * For backward compatibility reasons, this flag + * is also the code for ERR_R_FATAL (that reason + * code served the dual purpose of flag and reason + * code in one in pre-3.0 OpenSSL). + * ERR_RFLAG_COMMON Flags that the reason code is common to all + * libraries. All ERR_R_ macros must use this flag, + * and no other _R_ macro is allowed to use it. + */ + +/* Macros to help decode recorded system errors */ +# define ERR_SYSTEM_FLAG ((unsigned int)INT_MAX + 1) +# define ERR_SYSTEM_MASK ((unsigned int)INT_MAX) + +/* + * Macros to help decode recorded OpenSSL errors + * As expressed above, RFLAGS and REASON overlap by one bit to allow + * ERR_R_FATAL to use ERR_RFLAG_FATAL as its reason code. + */ +# define ERR_LIB_OFFSET 23L +# define ERR_LIB_MASK 0xFF +# define ERR_RFLAGS_OFFSET 18L +# define ERR_RFLAGS_MASK 0x1F +# define ERR_REASON_MASK 0X7FFFFF + +/* + * Reason flags are defined pre-shifted to easily combine with the reason + * number. + */ +# define ERR_RFLAG_FATAL (0x1 << ERR_RFLAGS_OFFSET) +# define ERR_RFLAG_COMMON (0x2 << ERR_RFLAGS_OFFSET) + +# define ERR_SYSTEM_ERROR(errcode) (((errcode) & ERR_SYSTEM_FLAG) != 0) + +static ossl_unused ossl_inline int ERR_GET_LIB(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return ERR_LIB_SYS; + return (errcode >> ERR_LIB_OFFSET) & ERR_LIB_MASK; +} + +static ossl_unused ossl_inline int ERR_GET_RFLAGS(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return 0; + return errcode & (ERR_RFLAGS_MASK << ERR_RFLAGS_OFFSET); +} + +static ossl_unused ossl_inline int ERR_GET_REASON(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return errcode & ERR_SYSTEM_MASK; + return errcode & ERR_REASON_MASK; +} + +static ossl_unused ossl_inline int ERR_FATAL_ERROR(unsigned long errcode) +{ + return (ERR_GET_RFLAGS(errcode) & ERR_RFLAG_FATAL) != 0; +} + +static ossl_unused ossl_inline int ERR_COMMON_ERROR(unsigned long errcode) +{ + return (ERR_GET_RFLAGS(errcode) & ERR_RFLAG_COMMON) != 0; +} + +/* + * ERR_PACK is a helper macro to properly pack OpenSSL error codes and may + * only be used for that purpose. System errors are packed internally. + * ERR_PACK takes reason flags and reason code combined in |reason|. + * ERR_PACK ignores |func|, that parameter is just legacy from pre-3.0 OpenSSL. + */ +# define ERR_PACK(lib,func,reason) \ + ( (((unsigned long)(lib) & ERR_LIB_MASK ) << ERR_LIB_OFFSET) | \ + (((unsigned long)(reason) & ERR_REASON_MASK)) ) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SYS_F_FOPEN 0 +# define SYS_F_CONNECT 0 +# define SYS_F_GETSERVBYNAME 0 +# define SYS_F_SOCKET 0 +# define SYS_F_IOCTLSOCKET 0 +# define SYS_F_BIND 0 +# define SYS_F_LISTEN 0 +# define SYS_F_ACCEPT 0 +# define SYS_F_WSASTARTUP 0 +# define SYS_F_OPENDIR 0 +# define SYS_F_FREAD 0 +# define SYS_F_GETADDRINFO 0 +# define SYS_F_GETNAMEINFO 0 +# define SYS_F_SETSOCKOPT 0 +# define SYS_F_GETSOCKOPT 0 +# define SYS_F_GETSOCKNAME 0 +# define SYS_F_GETHOSTBYNAME 0 +# define SYS_F_FFLUSH 0 +# define SYS_F_OPEN 0 +# define SYS_F_CLOSE 0 +# define SYS_F_IOCTL 0 +# define SYS_F_STAT 0 +# define SYS_F_FCNTL 0 +# define SYS_F_FSTAT 0 +# define SYS_F_SENDFILE 0 +# endif + +/* + * All ERR_R_ codes must be combined with ERR_RFLAG_COMMON. + */ + +/* "we came from here" global reason codes, range 1..255 */ +# define ERR_R_SYS_LIB (ERR_LIB_SYS/* 2 */ | ERR_RFLAG_COMMON) +# define ERR_R_BN_LIB (ERR_LIB_BN/* 3 */ | ERR_RFLAG_COMMON) +# define ERR_R_RSA_LIB (ERR_LIB_RSA/* 4 */ | ERR_RFLAG_COMMON) +# define ERR_R_DH_LIB (ERR_LIB_DH/* 5 */ | ERR_RFLAG_COMMON) +# define ERR_R_EVP_LIB (ERR_LIB_EVP/* 6 */ | ERR_RFLAG_COMMON) +# define ERR_R_BUF_LIB (ERR_LIB_BUF/* 7 */ | ERR_RFLAG_COMMON) +# define ERR_R_OBJ_LIB (ERR_LIB_OBJ/* 8 */ | ERR_RFLAG_COMMON) +# define ERR_R_PEM_LIB (ERR_LIB_PEM/* 9 */ | ERR_RFLAG_COMMON) +# define ERR_R_DSA_LIB (ERR_LIB_DSA/* 10 */ | ERR_RFLAG_COMMON) +# define ERR_R_X509_LIB (ERR_LIB_X509/* 11 */ | ERR_RFLAG_COMMON) +# define ERR_R_ASN1_LIB (ERR_LIB_ASN1/* 13 */ | ERR_RFLAG_COMMON) +# define ERR_R_CONF_LIB (ERR_LIB_CONF/* 14 */ | ERR_RFLAG_COMMON) +# define ERR_R_CRYPTO_LIB (ERR_LIB_CRYPTO/* 15 */ | ERR_RFLAG_COMMON) +# define ERR_R_EC_LIB (ERR_LIB_EC/* 16 */ | ERR_RFLAG_COMMON) +# define ERR_R_SSL_LIB (ERR_LIB_SSL/* 20 */ | ERR_RFLAG_COMMON) +# define ERR_R_BIO_LIB (ERR_LIB_BIO/* 32 */ | ERR_RFLAG_COMMON) +# define ERR_R_PKCS7_LIB (ERR_LIB_PKCS7/* 33 */ | ERR_RFLAG_COMMON) +# define ERR_R_X509V3_LIB (ERR_LIB_X509V3/* 34 */ | ERR_RFLAG_COMMON) +# define ERR_R_PKCS12_LIB (ERR_LIB_PKCS12/* 35 */ | ERR_RFLAG_COMMON) +# define ERR_R_RAND_LIB (ERR_LIB_RAND/* 36 */ | ERR_RFLAG_COMMON) +# define ERR_R_DSO_LIB (ERR_LIB_DSO/* 37 */ | ERR_RFLAG_COMMON) +# define ERR_R_ENGINE_LIB (ERR_LIB_ENGINE/* 38 */ | ERR_RFLAG_COMMON) +# define ERR_R_UI_LIB (ERR_LIB_UI/* 40 */ | ERR_RFLAG_COMMON) +# define ERR_R_ECDSA_LIB (ERR_LIB_ECDSA/* 42 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_STORE_LIB (ERR_LIB_OSSL_STORE/* 44 */ | ERR_RFLAG_COMMON) +# define ERR_R_CMS_LIB (ERR_LIB_CMS/* 46 */ | ERR_RFLAG_COMMON) +# define ERR_R_TS_LIB (ERR_LIB_TS/* 47 */ | ERR_RFLAG_COMMON) +# define ERR_R_CT_LIB (ERR_LIB_CT/* 50 */ | ERR_RFLAG_COMMON) +# define ERR_R_PROV_LIB (ERR_LIB_PROV/* 57 */ | ERR_RFLAG_COMMON) +# define ERR_R_ESS_LIB (ERR_LIB_ESS/* 54 */ | ERR_RFLAG_COMMON) +# define ERR_R_CMP_LIB (ERR_LIB_CMP/* 58 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_ENCODER_LIB (ERR_LIB_OSSL_ENCODER/* 59 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_DECODER_LIB (ERR_LIB_OSSL_DECODER/* 60 */ | ERR_RFLAG_COMMON) + +/* Other common error codes, range 256..2^ERR_RFLAGS_OFFSET-1 */ +# define ERR_R_FATAL (ERR_RFLAG_FATAL|ERR_RFLAG_COMMON) +# define ERR_R_MALLOC_FAILURE (256|ERR_R_FATAL) +# define ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED (257|ERR_R_FATAL) +# define ERR_R_PASSED_NULL_PARAMETER (258|ERR_R_FATAL) +# define ERR_R_INTERNAL_ERROR (259|ERR_R_FATAL) +# define ERR_R_DISABLED (260|ERR_R_FATAL) +# define ERR_R_INIT_FAIL (261|ERR_R_FATAL) +# define ERR_R_PASSED_INVALID_ARGUMENT (262|ERR_RFLAG_COMMON) +# define ERR_R_OPERATION_FAIL (263|ERR_R_FATAL) +# define ERR_R_INVALID_PROVIDER_FUNCTIONS (264|ERR_R_FATAL) +# define ERR_R_INTERRUPTED_OR_CANCELLED (265|ERR_RFLAG_COMMON) +# define ERR_R_NESTED_ASN1_ERROR (266|ERR_RFLAG_COMMON) +# define ERR_R_MISSING_ASN1_EOS (267|ERR_RFLAG_COMMON) +# define ERR_R_UNSUPPORTED (268|ERR_RFLAG_COMMON) +# define ERR_R_FETCH_FAILED (269|ERR_RFLAG_COMMON) +# define ERR_R_INVALID_PROPERTY_DEFINITION (270|ERR_RFLAG_COMMON) +# define ERR_R_UNABLE_TO_GET_READ_LOCK (271|ERR_R_FATAL) +# define ERR_R_UNABLE_TO_GET_WRITE_LOCK (272|ERR_R_FATAL) + +typedef struct ERR_string_data_st { + unsigned long error; + const char *string; +} ERR_STRING_DATA; + +DEFINE_LHASH_OF_INTERNAL(ERR_STRING_DATA); +#define lh_ERR_STRING_DATA_new(hfn, cmp) ((LHASH_OF(ERR_STRING_DATA) *)OPENSSL_LH_new(ossl_check_ERR_STRING_DATA_lh_hashfunc_type(hfn), ossl_check_ERR_STRING_DATA_lh_compfunc_type(cmp))) +#define lh_ERR_STRING_DATA_free(lh) OPENSSL_LH_free(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_flush(lh) OPENSSL_LH_flush(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_insert(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_insert(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_delete(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_delete(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_const_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_retrieve(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_retrieve(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_const_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_error(lh) OPENSSL_LH_error(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_num_items(lh) OPENSSL_LH_num_items(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_ERR_STRING_DATA_lh_type(lh), dl) +#define lh_ERR_STRING_DATA_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_ERR_STRING_DATA_lh_doallfunc_type(dfn)) + + +/* 12 lines and some on an 80 column terminal */ +#define ERR_MAX_DATA_SIZE 1024 + +/* Building blocks */ +void ERR_new(void); +void ERR_set_debug(const char *file, int line, const char *func); +void ERR_set_error(int lib, int reason, const char *fmt, ...); +void ERR_vset_error(int lib, int reason, const char *fmt, va_list args); + +/* Main error raising functions */ +# define ERR_raise(lib, reason) ERR_raise_data((lib),(reason),NULL) +# define ERR_raise_data \ + (ERR_new(), \ + ERR_set_debug(OPENSSL_FILE,OPENSSL_LINE,OPENSSL_FUNC), \ + ERR_set_error) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* Backward compatibility */ +# define ERR_put_error(lib, func, reason, file, line) \ + (ERR_new(), \ + ERR_set_debug((file), (line), OPENSSL_FUNC), \ + ERR_set_error((lib), (reason), NULL)) +# endif + +void ERR_set_error_data(char *data, int flags); + +unsigned long ERR_get_error(void); +unsigned long ERR_get_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_get_error_line(const char **file, int *line); +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_get_error_line_data(const char **file, int *line, + const char **data, int *flags); +#endif +unsigned long ERR_peek_error(void); +unsigned long ERR_peek_error_line(const char **file, int *line); +unsigned long ERR_peek_error_func(const char **func); +unsigned long ERR_peek_error_data(const char **data, int *flags); +unsigned long ERR_peek_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_peek_error_line_data(const char **file, int *line, + const char **data, int *flags); +# endif +unsigned long ERR_peek_last_error(void); +unsigned long ERR_peek_last_error_line(const char **file, int *line); +unsigned long ERR_peek_last_error_func(const char **func); +unsigned long ERR_peek_last_error_data(const char **data, int *flags); +unsigned long ERR_peek_last_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_peek_last_error_line_data(const char **file, int *line, + const char **data, int *flags); +# endif + +void ERR_clear_error(void); + +char *ERR_error_string(unsigned long e, char *buf); +void ERR_error_string_n(unsigned long e, char *buf, size_t len); +const char *ERR_lib_error_string(unsigned long e); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 const char *ERR_func_error_string(unsigned long e); +# endif +const char *ERR_reason_error_string(unsigned long e); + +void ERR_print_errors_cb(int (*cb) (const char *str, size_t len, void *u), + void *u); +# ifndef OPENSSL_NO_STDIO +void ERR_print_errors_fp(FILE *fp); +# endif +void ERR_print_errors(BIO *bp); + +void ERR_add_error_data(int num, ...); +void ERR_add_error_vdata(int num, va_list args); +void ERR_add_error_txt(const char *sepr, const char *txt); +void ERR_add_error_mem_bio(const char *sep, BIO *bio); + +int ERR_load_strings(int lib, ERR_STRING_DATA *str); +int ERR_load_strings_const(const ERR_STRING_DATA *str); +int ERR_unload_strings(int lib, ERR_STRING_DATA *str); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define ERR_load_crypto_strings() \ + OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL) +# define ERR_free_strings() while(0) continue +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void ERR_remove_thread_state(void *); +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_0_0 +OSSL_DEPRECATEDIN_1_0_0 void ERR_remove_state(unsigned long pid); +#endif +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 ERR_STATE *ERR_get_state(void); +#endif + +int ERR_get_next_error_library(void); + +int ERR_set_mark(void); +int ERR_pop_to_mark(void); +int ERR_clear_last_mark(void); +int ERR_count_to_mark(void); + +ERR_STATE *OSSL_ERR_STATE_new(void); +void OSSL_ERR_STATE_save(ERR_STATE *es); +void OSSL_ERR_STATE_save_to_mark(ERR_STATE *es); +void OSSL_ERR_STATE_restore(const ERR_STATE *es); +void OSSL_ERR_STATE_free(ERR_STATE *es); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h new file mode 100644 index 00000000000..4055bebbea2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ess.h @@ -0,0 +1,128 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ess.h.in + * + * Copyright 2019-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ESS_H +# define OPENSSL_ESS_H +# pragma once + +# include + +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + + +typedef struct ESS_issuer_serial ESS_ISSUER_SERIAL; +typedef struct ESS_cert_id ESS_CERT_ID; +typedef struct ESS_signing_cert ESS_SIGNING_CERT; + +SKM_DEFINE_STACK_OF_INTERNAL(ESS_CERT_ID, ESS_CERT_ID, ESS_CERT_ID) +#define sk_ESS_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_value(sk, idx) ((ESS_CERT_ID *)OPENSSL_sk_value(ossl_check_const_ESS_CERT_ID_sk_type(sk), (idx))) +#define sk_ESS_CERT_ID_new(cmp) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new(ossl_check_ESS_CERT_ID_compfunc_type(cmp))) +#define sk_ESS_CERT_ID_new_null() ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new_null()) +#define sk_ESS_CERT_ID_new_reserve(cmp, n) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_ESS_CERT_ID_compfunc_type(cmp), (n))) +#define sk_ESS_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ESS_CERT_ID_sk_type(sk), (n)) +#define sk_ESS_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_delete(sk, i) ((ESS_CERT_ID *)OPENSSL_sk_delete(ossl_check_ESS_CERT_ID_sk_type(sk), (i))) +#define sk_ESS_CERT_ID_delete_ptr(sk, ptr) ((ESS_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr))) +#define sk_ESS_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_pop(sk) ((ESS_CERT_ID *)OPENSSL_sk_pop(ossl_check_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_shift(sk) ((ESS_CERT_ID *)OPENSSL_sk_shift(ossl_check_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ESS_CERT_ID_sk_type(sk),ossl_check_ESS_CERT_ID_freefunc_type(freefunc)) +#define sk_ESS_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr), (idx)) +#define sk_ESS_CERT_ID_set(sk, idx, ptr) ((ESS_CERT_ID *)OPENSSL_sk_set(ossl_check_ESS_CERT_ID_sk_type(sk), (idx), ossl_check_ESS_CERT_ID_type(ptr))) +#define sk_ESS_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr), pnum) +#define sk_ESS_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_dup(sk) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_copyfunc_type(copyfunc), ossl_check_ESS_CERT_ID_freefunc_type(freefunc))) +#define sk_ESS_CERT_ID_set_cmp_func(sk, cmp) ((sk_ESS_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_compfunc_type(cmp))) + + + +typedef struct ESS_signing_cert_v2_st ESS_SIGNING_CERT_V2; +typedef struct ESS_cert_id_v2_st ESS_CERT_ID_V2; + +SKM_DEFINE_STACK_OF_INTERNAL(ESS_CERT_ID_V2, ESS_CERT_ID_V2, ESS_CERT_ID_V2) +#define sk_ESS_CERT_ID_V2_num(sk) OPENSSL_sk_num(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_value(sk, idx) ((ESS_CERT_ID_V2 *)OPENSSL_sk_value(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk), (idx))) +#define sk_ESS_CERT_ID_V2_new(cmp) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new(ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp))) +#define sk_ESS_CERT_ID_V2_new_null() ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new_null()) +#define sk_ESS_CERT_ID_V2_new_reserve(cmp, n) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new_reserve(ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp), (n))) +#define sk_ESS_CERT_ID_V2_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (n)) +#define sk_ESS_CERT_ID_V2_free(sk) OPENSSL_sk_free(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_zero(sk) OPENSSL_sk_zero(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_delete(sk, i) ((ESS_CERT_ID_V2 *)OPENSSL_sk_delete(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (i))) +#define sk_ESS_CERT_ID_V2_delete_ptr(sk, ptr) ((ESS_CERT_ID_V2 *)OPENSSL_sk_delete_ptr(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr))) +#define sk_ESS_CERT_ID_V2_push(sk, ptr) OPENSSL_sk_push(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_pop(sk) ((ESS_CERT_ID_V2 *)OPENSSL_sk_pop(ossl_check_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_shift(sk) ((ESS_CERT_ID_V2 *)OPENSSL_sk_shift(ossl_check_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ESS_CERT_ID_V2_sk_type(sk),ossl_check_ESS_CERT_ID_V2_freefunc_type(freefunc)) +#define sk_ESS_CERT_ID_V2_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr), (idx)) +#define sk_ESS_CERT_ID_V2_set(sk, idx, ptr) ((ESS_CERT_ID_V2 *)OPENSSL_sk_set(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (idx), ossl_check_ESS_CERT_ID_V2_type(ptr))) +#define sk_ESS_CERT_ID_V2_find(sk, ptr) OPENSSL_sk_find(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr), pnum) +#define sk_ESS_CERT_ID_V2_sort(sk) OPENSSL_sk_sort(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_dup(sk) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_dup(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_deep_copy(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_copyfunc_type(copyfunc), ossl_check_ESS_CERT_ID_V2_freefunc_type(freefunc))) +#define sk_ESS_CERT_ID_V2_set_cmp_func(sk, cmp) ((sk_ESS_CERT_ID_V2_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp))) + + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_ISSUER_SERIAL) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_ISSUER_SERIAL, ESS_ISSUER_SERIAL) +DECLARE_ASN1_DUP_FUNCTION(ESS_ISSUER_SERIAL) + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_CERT_ID) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_CERT_ID, ESS_CERT_ID) +DECLARE_ASN1_DUP_FUNCTION(ESS_CERT_ID) + +DECLARE_ASN1_FUNCTIONS(ESS_SIGNING_CERT) +DECLARE_ASN1_DUP_FUNCTION(ESS_SIGNING_CERT) + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_CERT_ID_V2) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_CERT_ID_V2, ESS_CERT_ID_V2) +DECLARE_ASN1_DUP_FUNCTION(ESS_CERT_ID_V2) + +DECLARE_ASN1_FUNCTIONS(ESS_SIGNING_CERT_V2) +DECLARE_ASN1_DUP_FUNCTION(ESS_SIGNING_CERT_V2) + +ESS_SIGNING_CERT *OSSL_ESS_signing_cert_new_init(const X509 *signcert, + const STACK_OF(X509) *certs, + int set_issuer_serial); +ESS_SIGNING_CERT_V2 *OSSL_ESS_signing_cert_v2_new_init(const EVP_MD *hash_alg, + const X509 *signcert, + const + STACK_OF(X509) *certs, + int set_issuer_serial); +int OSSL_ESS_check_signing_certs(const ESS_SIGNING_CERT *ss, + const ESS_SIGNING_CERT_V2 *ssv2, + const STACK_OF(X509) *chain, + int require_signing_cert); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h new file mode 100644 index 00000000000..42ba014b313 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/fipskey.h @@ -0,0 +1,36 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/fipskey.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_FIPSKEY_H +# define OPENSSL_FIPSKEY_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * The FIPS validation HMAC key, usable as an array initializer. + */ +#define FIPS_KEY_ELEMENTS \ + 0xf4, 0x55, 0x66, 0x50, 0xac, 0x31, 0xd3, 0x54, 0x61, 0x61, 0x0b, 0xac, 0x4e, 0xd8, 0x1b, 0x1a, 0x18, 0x1b, 0x2d, 0x8a, 0x43, 0xea, 0x28, 0x54, 0xcb, 0xae, 0x22, 0xca, 0x74, 0x56, 0x08, 0x13 + +/* + * The FIPS validation key, as a string. + */ +#define FIPS_KEY_STRING "f4556650ac31d35461610bac4ed81b1a181b2d8a43ea2854cbae22ca74560813" + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h new file mode 100644 index 00000000000..8af9edd24a8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/lhash.h @@ -0,0 +1,331 @@ +/* + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +/* + * Header for dynamic hash table routines Author - Eric Young + */ + +#ifndef OPENSSL_LHASH_H +# define OPENSSL_LHASH_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_LHASH_H +# endif + +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct lhash_node_st OPENSSL_LH_NODE; +typedef int (*OPENSSL_LH_COMPFUNC) (const void *, const void *); +typedef unsigned long (*OPENSSL_LH_HASHFUNC) (const void *); +typedef void (*OPENSSL_LH_DOALL_FUNC) (void *); +typedef void (*OPENSSL_LH_DOALL_FUNCARG) (void *, void *); +typedef struct lhash_st OPENSSL_LHASH; + +/* + * Macros for declaring and implementing type-safe wrappers for LHASH + * callbacks. This way, callbacks can be provided to LHASH structures without + * function pointer casting and the macro-defined callbacks provide + * per-variable casting before deferring to the underlying type-specific + * callbacks. NB: It is possible to place a "static" in front of both the + * DECLARE and IMPLEMENT macros if the functions are strictly internal. + */ + +/* First: "hash" functions */ +# define DECLARE_LHASH_HASH_FN(name, o_type) \ + unsigned long name##_LHASH_HASH(const void *); +# define IMPLEMENT_LHASH_HASH_FN(name, o_type) \ + unsigned long name##_LHASH_HASH(const void *arg) { \ + const o_type *a = arg; \ + return name##_hash(a); } +# define LHASH_HASH_FN(name) name##_LHASH_HASH + +/* Second: "compare" functions */ +# define DECLARE_LHASH_COMP_FN(name, o_type) \ + int name##_LHASH_COMP(const void *, const void *); +# define IMPLEMENT_LHASH_COMP_FN(name, o_type) \ + int name##_LHASH_COMP(const void *arg1, const void *arg2) { \ + const o_type *a = arg1; \ + const o_type *b = arg2; \ + return name##_cmp(a,b); } +# define LHASH_COMP_FN(name) name##_LHASH_COMP + +/* Fourth: "doall_arg" functions */ +# define DECLARE_LHASH_DOALL_ARG_FN(name, o_type, a_type) \ + void name##_LHASH_DOALL_ARG(void *, void *); +# define IMPLEMENT_LHASH_DOALL_ARG_FN(name, o_type, a_type) \ + void name##_LHASH_DOALL_ARG(void *arg1, void *arg2) { \ + o_type *a = arg1; \ + a_type *b = arg2; \ + name##_doall_arg(a, b); } +# define LHASH_DOALL_ARG_FN(name) name##_LHASH_DOALL_ARG + + +# define LH_LOAD_MULT 256 + +int OPENSSL_LH_error(OPENSSL_LHASH *lh); +OPENSSL_LHASH *OPENSSL_LH_new(OPENSSL_LH_HASHFUNC h, OPENSSL_LH_COMPFUNC c); +void OPENSSL_LH_free(OPENSSL_LHASH *lh); +void OPENSSL_LH_flush(OPENSSL_LHASH *lh); +void *OPENSSL_LH_insert(OPENSSL_LHASH *lh, void *data); +void *OPENSSL_LH_delete(OPENSSL_LHASH *lh, const void *data); +void *OPENSSL_LH_retrieve(OPENSSL_LHASH *lh, const void *data); +void OPENSSL_LH_doall(OPENSSL_LHASH *lh, OPENSSL_LH_DOALL_FUNC func); +void OPENSSL_LH_doall_arg(OPENSSL_LHASH *lh, OPENSSL_LH_DOALL_FUNCARG func, void *arg); +unsigned long OPENSSL_LH_strhash(const char *c); +unsigned long OPENSSL_LH_num_items(const OPENSSL_LHASH *lh); +unsigned long OPENSSL_LH_get_down_load(const OPENSSL_LHASH *lh); +void OPENSSL_LH_set_down_load(OPENSSL_LHASH *lh, unsigned long down_load); + +# ifndef OPENSSL_NO_STDIO +# ifndef OPENSSL_NO_DEPRECATED_3_1 +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats(const OPENSSL_LHASH *lh, FILE *fp); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats(const OPENSSL_LHASH *lh, FILE *fp); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats(const OPENSSL_LHASH *lh, FILE *fp); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_1 +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +# endif + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define _LHASH OPENSSL_LHASH +# define LHASH_NODE OPENSSL_LH_NODE +# define lh_error OPENSSL_LH_error +# define lh_new OPENSSL_LH_new +# define lh_free OPENSSL_LH_free +# define lh_insert OPENSSL_LH_insert +# define lh_delete OPENSSL_LH_delete +# define lh_retrieve OPENSSL_LH_retrieve +# define lh_doall OPENSSL_LH_doall +# define lh_doall_arg OPENSSL_LH_doall_arg +# define lh_strhash OPENSSL_LH_strhash +# define lh_num_items OPENSSL_LH_num_items +# ifndef OPENSSL_NO_STDIO +# define lh_stats OPENSSL_LH_stats +# define lh_node_stats OPENSSL_LH_node_stats +# define lh_node_usage_stats OPENSSL_LH_node_usage_stats +# endif +# define lh_stats_bio OPENSSL_LH_stats_bio +# define lh_node_stats_bio OPENSSL_LH_node_stats_bio +# define lh_node_usage_stats_bio OPENSSL_LH_node_usage_stats_bio +# endif + +/* Type checking... */ + +# define LHASH_OF(type) struct lhash_st_##type + +/* Helper macro for internal use */ +# define DEFINE_LHASH_OF_INTERNAL(type) \ + LHASH_OF(type) { \ + union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ + }; \ + typedef int (*lh_##type##_compfunc)(const type *a, const type *b); \ + typedef unsigned long (*lh_##type##_hashfunc)(const type *a); \ + typedef void (*lh_##type##_doallfunc)(type *a); \ + static ossl_unused ossl_inline type *\ + ossl_check_##type##_lh_plain_type(type *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const type * \ + ossl_check_const_##type##_lh_plain_type(const type *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const OPENSSL_LHASH * \ + ossl_check_const_##type##_lh_type(const LHASH_OF(type) *lh) \ + { \ + return (const OPENSSL_LHASH *)lh; \ + } \ + static ossl_unused ossl_inline OPENSSL_LHASH * \ + ossl_check_##type##_lh_type(LHASH_OF(type) *lh) \ + { \ + return (OPENSSL_LHASH *)lh; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_COMPFUNC \ + ossl_check_##type##_lh_compfunc_type(lh_##type##_compfunc cmp) \ + { \ + return (OPENSSL_LH_COMPFUNC)cmp; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_HASHFUNC \ + ossl_check_##type##_lh_hashfunc_type(lh_##type##_hashfunc hfn) \ + { \ + return (OPENSSL_LH_HASHFUNC)hfn; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_DOALL_FUNC \ + ossl_check_##type##_lh_doallfunc_type(lh_##type##_doallfunc dfn) \ + { \ + return (OPENSSL_LH_DOALL_FUNC)dfn; \ + } \ + LHASH_OF(type) + +# ifndef OPENSSL_NO_DEPRECATED_3_1 +# define DEFINE_LHASH_OF_DEPRECATED(type) \ + static ossl_unused ossl_inline void \ + lh_##type##_node_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_node_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_node_usage_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_node_usage_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } +# else +# define DEFINE_LHASH_OF_DEPRECATED(type) +# endif + +# define DEFINE_LHASH_OF_EX(type) \ + LHASH_OF(type) { \ + union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ + }; \ + static ossl_unused ossl_inline LHASH_OF(type) * \ + lh_##type##_new(unsigned long (*hfn)(const type *), \ + int (*cfn)(const type *, const type *)) \ + { \ + return (LHASH_OF(type) *) \ + OPENSSL_LH_new((OPENSSL_LH_HASHFUNC)hfn, (OPENSSL_LH_COMPFUNC)cfn); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_free(LHASH_OF(type) *lh) \ + { \ + OPENSSL_LH_free((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_flush(LHASH_OF(type) *lh) \ + { \ + OPENSSL_LH_flush((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_insert(LHASH_OF(type) *lh, type *d) \ + { \ + return (type *)OPENSSL_LH_insert((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_delete(LHASH_OF(type) *lh, const type *d) \ + { \ + return (type *)OPENSSL_LH_delete((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_retrieve(LHASH_OF(type) *lh, const type *d) \ + { \ + return (type *)OPENSSL_LH_retrieve((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline int \ + lh_##type##_error(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_error((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline unsigned long \ + lh_##type##_num_items(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_num_items((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline unsigned long \ + lh_##type##_get_down_load(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_get_down_load((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_set_down_load(LHASH_OF(type) *lh, unsigned long dl) \ + { \ + OPENSSL_LH_set_down_load((OPENSSL_LHASH *)lh, dl); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall(LHASH_OF(type) *lh, void (*doall)(type *)) \ + { \ + OPENSSL_LH_doall((OPENSSL_LHASH *)lh, (OPENSSL_LH_DOALL_FUNC)doall); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_arg(LHASH_OF(type) *lh, \ + void (*doallarg)(type *, void *), void *arg) \ + { \ + OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ + (OPENSSL_LH_DOALL_FUNCARG)doallarg, arg); \ + } \ + LHASH_OF(type) + +# define DEFINE_LHASH_OF(type) \ + DEFINE_LHASH_OF_EX(type); \ + DEFINE_LHASH_OF_DEPRECATED(type) \ + LHASH_OF(type) + +#define IMPLEMENT_LHASH_DOALL_ARG_CONST(type, argtype) \ + int_implement_lhash_doall(type, argtype, const type) + +#define IMPLEMENT_LHASH_DOALL_ARG(type, argtype) \ + int_implement_lhash_doall(type, argtype, type) + +#define int_implement_lhash_doall(type, argtype, cbargtype) \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_##argtype(LHASH_OF(type) *lh, \ + void (*fn)(cbargtype *, argtype *), \ + argtype *arg) \ + { \ + OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ + (OPENSSL_LH_DOALL_FUNCARG)fn, (void *)arg); \ + } \ + LHASH_OF(type) + +DEFINE_LHASH_OF_INTERNAL(OPENSSL_STRING); +#define lh_OPENSSL_STRING_new(hfn, cmp) ((LHASH_OF(OPENSSL_STRING) *)OPENSSL_LH_new(ossl_check_OPENSSL_STRING_lh_hashfunc_type(hfn), ossl_check_OPENSSL_STRING_lh_compfunc_type(cmp))) +#define lh_OPENSSL_STRING_free(lh) OPENSSL_LH_free(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_flush(lh) OPENSSL_LH_flush(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_insert(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_insert(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_delete(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_delete(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_const_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_retrieve(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_retrieve(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_const_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_error(lh) OPENSSL_LH_error(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_num_items(lh) OPENSSL_LH_num_items(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_OPENSSL_STRING_lh_type(lh), dl) +#define lh_OPENSSL_STRING_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_OPENSSL_STRING_lh_doallfunc_type(dfn)) +DEFINE_LHASH_OF_INTERNAL(OPENSSL_CSTRING); +#define lh_OPENSSL_CSTRING_new(hfn, cmp) ((LHASH_OF(OPENSSL_CSTRING) *)OPENSSL_LH_new(ossl_check_OPENSSL_CSTRING_lh_hashfunc_type(hfn), ossl_check_OPENSSL_CSTRING_lh_compfunc_type(cmp))) +#define lh_OPENSSL_CSTRING_free(lh) OPENSSL_LH_free(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_flush(lh) OPENSSL_LH_flush(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_insert(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_insert(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_delete(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_delete(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_const_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_retrieve(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_retrieve(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_const_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_error(lh) OPENSSL_LH_error(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_num_items(lh) OPENSSL_LH_num_items(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_OPENSSL_CSTRING_lh_type(lh), dl) +#define lh_OPENSSL_CSTRING_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_OPENSSL_CSTRING_lh_doallfunc_type(dfn)) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h new file mode 100644 index 00000000000..142b183140b --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ocsp.h @@ -0,0 +1,483 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ocsp.h.in + * + * Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_OCSP_H +# define OPENSSL_OCSP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_OCSP_H +# endif + +# include +# include +# include + +/* + * These definitions are outside the OPENSSL_NO_OCSP guard because although for + * historical reasons they have OCSP_* names, they can actually be used + * independently of OCSP. E.g. see RFC5280 + */ +/*- + * CRLReason ::= ENUMERATED { + * unspecified (0), + * keyCompromise (1), + * cACompromise (2), + * affiliationChanged (3), + * superseded (4), + * cessationOfOperation (5), + * certificateHold (6), + * -- value 7 is not used + * removeFromCRL (8), + * privilegeWithdrawn (9), + * aACompromise (10) } + */ +# define OCSP_REVOKED_STATUS_NOSTATUS -1 +# define OCSP_REVOKED_STATUS_UNSPECIFIED 0 +# define OCSP_REVOKED_STATUS_KEYCOMPROMISE 1 +# define OCSP_REVOKED_STATUS_CACOMPROMISE 2 +# define OCSP_REVOKED_STATUS_AFFILIATIONCHANGED 3 +# define OCSP_REVOKED_STATUS_SUPERSEDED 4 +# define OCSP_REVOKED_STATUS_CESSATIONOFOPERATION 5 +# define OCSP_REVOKED_STATUS_CERTIFICATEHOLD 6 +# define OCSP_REVOKED_STATUS_REMOVEFROMCRL 8 +# define OCSP_REVOKED_STATUS_PRIVILEGEWITHDRAWN 9 +# define OCSP_REVOKED_STATUS_AACOMPROMISE 10 + + +# ifndef OPENSSL_NO_OCSP + +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +/* Various flags and values */ + +# define OCSP_DEFAULT_NONCE_LENGTH 16 + +# define OCSP_NOCERTS 0x1 +# define OCSP_NOINTERN 0x2 +# define OCSP_NOSIGS 0x4 +# define OCSP_NOCHAIN 0x8 +# define OCSP_NOVERIFY 0x10 +# define OCSP_NOEXPLICIT 0x20 +# define OCSP_NOCASIGN 0x40 +# define OCSP_NODELEGATED 0x80 +# define OCSP_NOCHECKS 0x100 +# define OCSP_TRUSTOTHER 0x200 +# define OCSP_RESPID_KEY 0x400 +# define OCSP_NOTIME 0x800 +# define OCSP_PARTIAL_CHAIN 0x1000 + +typedef struct ocsp_cert_id_st OCSP_CERTID; +typedef struct ocsp_one_request_st OCSP_ONEREQ; +typedef struct ocsp_req_info_st OCSP_REQINFO; +typedef struct ocsp_signature_st OCSP_SIGNATURE; +typedef struct ocsp_request_st OCSP_REQUEST; + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_CERTID, OCSP_CERTID, OCSP_CERTID) +#define sk_OCSP_CERTID_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_value(sk, idx) ((OCSP_CERTID *)OPENSSL_sk_value(ossl_check_const_OCSP_CERTID_sk_type(sk), (idx))) +#define sk_OCSP_CERTID_new(cmp) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new(ossl_check_OCSP_CERTID_compfunc_type(cmp))) +#define sk_OCSP_CERTID_new_null() ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new_null()) +#define sk_OCSP_CERTID_new_reserve(cmp, n) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_CERTID_compfunc_type(cmp), (n))) +#define sk_OCSP_CERTID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_CERTID_sk_type(sk), (n)) +#define sk_OCSP_CERTID_free(sk) OPENSSL_sk_free(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_delete(sk, i) ((OCSP_CERTID *)OPENSSL_sk_delete(ossl_check_OCSP_CERTID_sk_type(sk), (i))) +#define sk_OCSP_CERTID_delete_ptr(sk, ptr) ((OCSP_CERTID *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr))) +#define sk_OCSP_CERTID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_pop(sk) ((OCSP_CERTID *)OPENSSL_sk_pop(ossl_check_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_shift(sk) ((OCSP_CERTID *)OPENSSL_sk_shift(ossl_check_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_CERTID_sk_type(sk),ossl_check_OCSP_CERTID_freefunc_type(freefunc)) +#define sk_OCSP_CERTID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr), (idx)) +#define sk_OCSP_CERTID_set(sk, idx, ptr) ((OCSP_CERTID *)OPENSSL_sk_set(ossl_check_OCSP_CERTID_sk_type(sk), (idx), ossl_check_OCSP_CERTID_type(ptr))) +#define sk_OCSP_CERTID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr), pnum) +#define sk_OCSP_CERTID_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_dup(sk) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_dup(ossl_check_const_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_copyfunc_type(copyfunc), ossl_check_OCSP_CERTID_freefunc_type(freefunc))) +#define sk_OCSP_CERTID_set_cmp_func(sk, cmp) ((sk_OCSP_CERTID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_ONEREQ, OCSP_ONEREQ, OCSP_ONEREQ) +#define sk_OCSP_ONEREQ_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_value(sk, idx) ((OCSP_ONEREQ *)OPENSSL_sk_value(ossl_check_const_OCSP_ONEREQ_sk_type(sk), (idx))) +#define sk_OCSP_ONEREQ_new(cmp) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new(ossl_check_OCSP_ONEREQ_compfunc_type(cmp))) +#define sk_OCSP_ONEREQ_new_null() ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new_null()) +#define sk_OCSP_ONEREQ_new_reserve(cmp, n) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_ONEREQ_compfunc_type(cmp), (n))) +#define sk_OCSP_ONEREQ_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_ONEREQ_sk_type(sk), (n)) +#define sk_OCSP_ONEREQ_free(sk) OPENSSL_sk_free(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_delete(sk, i) ((OCSP_ONEREQ *)OPENSSL_sk_delete(ossl_check_OCSP_ONEREQ_sk_type(sk), (i))) +#define sk_OCSP_ONEREQ_delete_ptr(sk, ptr) ((OCSP_ONEREQ *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr))) +#define sk_OCSP_ONEREQ_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_pop(sk) ((OCSP_ONEREQ *)OPENSSL_sk_pop(ossl_check_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_shift(sk) ((OCSP_ONEREQ *)OPENSSL_sk_shift(ossl_check_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_ONEREQ_sk_type(sk),ossl_check_OCSP_ONEREQ_freefunc_type(freefunc)) +#define sk_OCSP_ONEREQ_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr), (idx)) +#define sk_OCSP_ONEREQ_set(sk, idx, ptr) ((OCSP_ONEREQ *)OPENSSL_sk_set(ossl_check_OCSP_ONEREQ_sk_type(sk), (idx), ossl_check_OCSP_ONEREQ_type(ptr))) +#define sk_OCSP_ONEREQ_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr), pnum) +#define sk_OCSP_ONEREQ_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_dup(sk) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_dup(ossl_check_const_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_copyfunc_type(copyfunc), ossl_check_OCSP_ONEREQ_freefunc_type(freefunc))) +#define sk_OCSP_ONEREQ_set_cmp_func(sk, cmp) ((sk_OCSP_ONEREQ_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_compfunc_type(cmp))) + + +# define OCSP_RESPONSE_STATUS_SUCCESSFUL 0 +# define OCSP_RESPONSE_STATUS_MALFORMEDREQUEST 1 +# define OCSP_RESPONSE_STATUS_INTERNALERROR 2 +# define OCSP_RESPONSE_STATUS_TRYLATER 3 +# define OCSP_RESPONSE_STATUS_SIGREQUIRED 5 +# define OCSP_RESPONSE_STATUS_UNAUTHORIZED 6 + +typedef struct ocsp_resp_bytes_st OCSP_RESPBYTES; + +# define V_OCSP_RESPID_NAME 0 +# define V_OCSP_RESPID_KEY 1 + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_RESPID, OCSP_RESPID, OCSP_RESPID) +#define sk_OCSP_RESPID_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_value(sk, idx) ((OCSP_RESPID *)OPENSSL_sk_value(ossl_check_const_OCSP_RESPID_sk_type(sk), (idx))) +#define sk_OCSP_RESPID_new(cmp) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new(ossl_check_OCSP_RESPID_compfunc_type(cmp))) +#define sk_OCSP_RESPID_new_null() ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new_null()) +#define sk_OCSP_RESPID_new_reserve(cmp, n) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_RESPID_compfunc_type(cmp), (n))) +#define sk_OCSP_RESPID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_RESPID_sk_type(sk), (n)) +#define sk_OCSP_RESPID_free(sk) OPENSSL_sk_free(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_delete(sk, i) ((OCSP_RESPID *)OPENSSL_sk_delete(ossl_check_OCSP_RESPID_sk_type(sk), (i))) +#define sk_OCSP_RESPID_delete_ptr(sk, ptr) ((OCSP_RESPID *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr))) +#define sk_OCSP_RESPID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_pop(sk) ((OCSP_RESPID *)OPENSSL_sk_pop(ossl_check_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_shift(sk) ((OCSP_RESPID *)OPENSSL_sk_shift(ossl_check_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_RESPID_sk_type(sk),ossl_check_OCSP_RESPID_freefunc_type(freefunc)) +#define sk_OCSP_RESPID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr), (idx)) +#define sk_OCSP_RESPID_set(sk, idx, ptr) ((OCSP_RESPID *)OPENSSL_sk_set(ossl_check_OCSP_RESPID_sk_type(sk), (idx), ossl_check_OCSP_RESPID_type(ptr))) +#define sk_OCSP_RESPID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr), pnum) +#define sk_OCSP_RESPID_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_dup(sk) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_dup(ossl_check_const_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_copyfunc_type(copyfunc), ossl_check_OCSP_RESPID_freefunc_type(freefunc))) +#define sk_OCSP_RESPID_set_cmp_func(sk, cmp) ((sk_OCSP_RESPID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_compfunc_type(cmp))) + + +typedef struct ocsp_revoked_info_st OCSP_REVOKEDINFO; + +# define V_OCSP_CERTSTATUS_GOOD 0 +# define V_OCSP_CERTSTATUS_REVOKED 1 +# define V_OCSP_CERTSTATUS_UNKNOWN 2 + +typedef struct ocsp_cert_status_st OCSP_CERTSTATUS; +typedef struct ocsp_single_response_st OCSP_SINGLERESP; + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_SINGLERESP, OCSP_SINGLERESP, OCSP_SINGLERESP) +#define sk_OCSP_SINGLERESP_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_value(sk, idx) ((OCSP_SINGLERESP *)OPENSSL_sk_value(ossl_check_const_OCSP_SINGLERESP_sk_type(sk), (idx))) +#define sk_OCSP_SINGLERESP_new(cmp) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new(ossl_check_OCSP_SINGLERESP_compfunc_type(cmp))) +#define sk_OCSP_SINGLERESP_new_null() ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new_null()) +#define sk_OCSP_SINGLERESP_new_reserve(cmp, n) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_SINGLERESP_compfunc_type(cmp), (n))) +#define sk_OCSP_SINGLERESP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_SINGLERESP_sk_type(sk), (n)) +#define sk_OCSP_SINGLERESP_free(sk) OPENSSL_sk_free(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_delete(sk, i) ((OCSP_SINGLERESP *)OPENSSL_sk_delete(ossl_check_OCSP_SINGLERESP_sk_type(sk), (i))) +#define sk_OCSP_SINGLERESP_delete_ptr(sk, ptr) ((OCSP_SINGLERESP *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr))) +#define sk_OCSP_SINGLERESP_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_pop(sk) ((OCSP_SINGLERESP *)OPENSSL_sk_pop(ossl_check_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_shift(sk) ((OCSP_SINGLERESP *)OPENSSL_sk_shift(ossl_check_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_SINGLERESP_sk_type(sk),ossl_check_OCSP_SINGLERESP_freefunc_type(freefunc)) +#define sk_OCSP_SINGLERESP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr), (idx)) +#define sk_OCSP_SINGLERESP_set(sk, idx, ptr) ((OCSP_SINGLERESP *)OPENSSL_sk_set(ossl_check_OCSP_SINGLERESP_sk_type(sk), (idx), ossl_check_OCSP_SINGLERESP_type(ptr))) +#define sk_OCSP_SINGLERESP_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr), pnum) +#define sk_OCSP_SINGLERESP_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_dup(sk) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_dup(ossl_check_const_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_copyfunc_type(copyfunc), ossl_check_OCSP_SINGLERESP_freefunc_type(freefunc))) +#define sk_OCSP_SINGLERESP_set_cmp_func(sk, cmp) ((sk_OCSP_SINGLERESP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_compfunc_type(cmp))) + + +typedef struct ocsp_response_data_st OCSP_RESPDATA; + +typedef struct ocsp_basic_response_st OCSP_BASICRESP; + +typedef struct ocsp_crl_id_st OCSP_CRLID; +typedef struct ocsp_service_locator_st OCSP_SERVICELOC; + +# define PEM_STRING_OCSP_REQUEST "OCSP REQUEST" +# define PEM_STRING_OCSP_RESPONSE "OCSP RESPONSE" + +# define d2i_OCSP_REQUEST_bio(bp,p) ASN1_d2i_bio_of(OCSP_REQUEST,OCSP_REQUEST_new,d2i_OCSP_REQUEST,bp,p) + +# define d2i_OCSP_RESPONSE_bio(bp,p) ASN1_d2i_bio_of(OCSP_RESPONSE,OCSP_RESPONSE_new,d2i_OCSP_RESPONSE,bp,p) + +# define PEM_read_bio_OCSP_REQUEST(bp,x,cb) (OCSP_REQUEST *)PEM_ASN1_read_bio( \ + (char *(*)())d2i_OCSP_REQUEST,PEM_STRING_OCSP_REQUEST, \ + bp,(char **)(x),cb,NULL) + +# define PEM_read_bio_OCSP_RESPONSE(bp,x,cb) (OCSP_RESPONSE *)PEM_ASN1_read_bio(\ + (char *(*)())d2i_OCSP_RESPONSE,PEM_STRING_OCSP_RESPONSE, \ + bp,(char **)(x),cb,NULL) + +# define PEM_write_bio_OCSP_REQUEST(bp,o) \ + PEM_ASN1_write_bio((int (*)())i2d_OCSP_REQUEST,PEM_STRING_OCSP_REQUEST,\ + bp,(char *)(o), NULL,NULL,0,NULL,NULL) + +# define PEM_write_bio_OCSP_RESPONSE(bp,o) \ + PEM_ASN1_write_bio((int (*)())i2d_OCSP_RESPONSE,PEM_STRING_OCSP_RESPONSE,\ + bp,(char *)(o), NULL,NULL,0,NULL,NULL) + +# define i2d_OCSP_RESPONSE_bio(bp,o) ASN1_i2d_bio_of(OCSP_RESPONSE,i2d_OCSP_RESPONSE,bp,o) + +# define i2d_OCSP_REQUEST_bio(bp,o) ASN1_i2d_bio_of(OCSP_REQUEST,i2d_OCSP_REQUEST,bp,o) + +# define ASN1_BIT_STRING_digest(data,type,md,len) \ + ASN1_item_digest(ASN1_ITEM_rptr(ASN1_BIT_STRING),type,data,md,len) + +# define OCSP_CERTSTATUS_dup(cs)\ + (OCSP_CERTSTATUS*)ASN1_dup((i2d_of_void *)i2d_OCSP_CERTSTATUS,\ + (d2i_of_void *)d2i_OCSP_CERTSTATUS,(char *)(cs)) + +DECLARE_ASN1_DUP_FUNCTION(OCSP_CERTID) + +OSSL_HTTP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, + const OCSP_REQUEST *req, int buf_size); +OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req); + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +typedef OSSL_HTTP_REQ_CTX OCSP_REQ_CTX; +# define OCSP_REQ_CTX_new(io, buf_size) \ + OSSL_HTTP_REQ_CTX_new(io, io, buf_size) +# define OCSP_REQ_CTX_free OSSL_HTTP_REQ_CTX_free +# define OCSP_REQ_CTX_http(rctx, op, path) \ + (OSSL_HTTP_REQ_CTX_set_expected(rctx, NULL, 1 /* asn1 */, 0, 0) && \ + OSSL_HTTP_REQ_CTX_set_request_line(rctx, strcmp(op, "POST") == 0, \ + NULL, NULL, path)) +# define OCSP_REQ_CTX_add1_header OSSL_HTTP_REQ_CTX_add1_header +# define OCSP_REQ_CTX_i2d(r, it, req) \ + OSSL_HTTP_REQ_CTX_set1_req(r, "application/ocsp-request", it, req) +# define OCSP_REQ_CTX_set1_req(r, req) \ + OCSP_REQ_CTX_i2d(r, ASN1_ITEM_rptr(OCSP_REQUEST), (ASN1_VALUE *)(req)) +# define OCSP_REQ_CTX_nbio OSSL_HTTP_REQ_CTX_nbio +# define OCSP_REQ_CTX_nbio_d2i OSSL_HTTP_REQ_CTX_nbio_d2i +# define OCSP_sendreq_nbio(p, r) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(r, (ASN1_VALUE **)(p), \ + ASN1_ITEM_rptr(OCSP_RESPONSE)) +# define OCSP_REQ_CTX_get0_mem_bio OSSL_HTTP_REQ_CTX_get0_mem_bio +# define OCSP_set_max_response_length OSSL_HTTP_REQ_CTX_set_max_response_length +# endif + +OCSP_CERTID *OCSP_cert_to_id(const EVP_MD *dgst, const X509 *subject, + const X509 *issuer); + +OCSP_CERTID *OCSP_cert_id_new(const EVP_MD *dgst, + const X509_NAME *issuerName, + const ASN1_BIT_STRING *issuerKey, + const ASN1_INTEGER *serialNumber); + +OCSP_ONEREQ *OCSP_request_add0_id(OCSP_REQUEST *req, OCSP_CERTID *cid); + +int OCSP_request_add1_nonce(OCSP_REQUEST *req, unsigned char *val, int len); +int OCSP_basic_add1_nonce(OCSP_BASICRESP *resp, unsigned char *val, int len); +int OCSP_check_nonce(OCSP_REQUEST *req, OCSP_BASICRESP *bs); +int OCSP_copy_nonce(OCSP_BASICRESP *resp, OCSP_REQUEST *req); + +int OCSP_request_set1_name(OCSP_REQUEST *req, const X509_NAME *nm); +int OCSP_request_add1_cert(OCSP_REQUEST *req, X509 *cert); + +int OCSP_request_sign(OCSP_REQUEST *req, + X509 *signer, + EVP_PKEY *key, + const EVP_MD *dgst, + STACK_OF(X509) *certs, unsigned long flags); + +int OCSP_response_status(OCSP_RESPONSE *resp); +OCSP_BASICRESP *OCSP_response_get1_basic(OCSP_RESPONSE *resp); + +const ASN1_OCTET_STRING *OCSP_resp_get0_signature(const OCSP_BASICRESP *bs); +const X509_ALGOR *OCSP_resp_get0_tbs_sigalg(const OCSP_BASICRESP *bs); +const OCSP_RESPDATA *OCSP_resp_get0_respdata(const OCSP_BASICRESP *bs); +int OCSP_resp_get0_signer(OCSP_BASICRESP *bs, X509 **signer, + STACK_OF(X509) *extra_certs); + +int OCSP_resp_count(OCSP_BASICRESP *bs); +OCSP_SINGLERESP *OCSP_resp_get0(OCSP_BASICRESP *bs, int idx); +const ASN1_GENERALIZEDTIME *OCSP_resp_get0_produced_at(const OCSP_BASICRESP* bs); +const STACK_OF(X509) *OCSP_resp_get0_certs(const OCSP_BASICRESP *bs); +int OCSP_resp_get0_id(const OCSP_BASICRESP *bs, + const ASN1_OCTET_STRING **pid, + const X509_NAME **pname); +int OCSP_resp_get1_id(const OCSP_BASICRESP *bs, + ASN1_OCTET_STRING **pid, + X509_NAME **pname); + +int OCSP_resp_find(OCSP_BASICRESP *bs, OCSP_CERTID *id, int last); +int OCSP_single_get0_status(OCSP_SINGLERESP *single, int *reason, + ASN1_GENERALIZEDTIME **revtime, + ASN1_GENERALIZEDTIME **thisupd, + ASN1_GENERALIZEDTIME **nextupd); +int OCSP_resp_find_status(OCSP_BASICRESP *bs, OCSP_CERTID *id, int *status, + int *reason, + ASN1_GENERALIZEDTIME **revtime, + ASN1_GENERALIZEDTIME **thisupd, + ASN1_GENERALIZEDTIME **nextupd); +int OCSP_check_validity(ASN1_GENERALIZEDTIME *thisupd, + ASN1_GENERALIZEDTIME *nextupd, long sec, long maxsec); + +int OCSP_request_verify(OCSP_REQUEST *req, STACK_OF(X509) *certs, + X509_STORE *store, unsigned long flags); + +# define OCSP_parse_url(url, host, port, path, ssl) \ + OSSL_HTTP_parse_url(url, ssl, NULL, host, port, NULL, path, NULL, NULL) + +int OCSP_id_issuer_cmp(const OCSP_CERTID *a, const OCSP_CERTID *b); +int OCSP_id_cmp(const OCSP_CERTID *a, const OCSP_CERTID *b); + +int OCSP_request_onereq_count(OCSP_REQUEST *req); +OCSP_ONEREQ *OCSP_request_onereq_get0(OCSP_REQUEST *req, int i); +OCSP_CERTID *OCSP_onereq_get0_id(OCSP_ONEREQ *one); +int OCSP_id_get0_info(ASN1_OCTET_STRING **piNameHash, ASN1_OBJECT **pmd, + ASN1_OCTET_STRING **pikeyHash, + ASN1_INTEGER **pserial, OCSP_CERTID *cid); +int OCSP_request_is_signed(OCSP_REQUEST *req); +OCSP_RESPONSE *OCSP_response_create(int status, OCSP_BASICRESP *bs); +OCSP_SINGLERESP *OCSP_basic_add1_status(OCSP_BASICRESP *rsp, + OCSP_CERTID *cid, + int status, int reason, + ASN1_TIME *revtime, + ASN1_TIME *thisupd, + ASN1_TIME *nextupd); +int OCSP_basic_add1_cert(OCSP_BASICRESP *resp, X509 *cert); +int OCSP_basic_sign(OCSP_BASICRESP *brsp, + X509 *signer, EVP_PKEY *key, const EVP_MD *dgst, + STACK_OF(X509) *certs, unsigned long flags); +int OCSP_basic_sign_ctx(OCSP_BASICRESP *brsp, + X509 *signer, EVP_MD_CTX *ctx, + STACK_OF(X509) *certs, unsigned long flags); +int OCSP_RESPID_set_by_name(OCSP_RESPID *respid, X509 *cert); +int OCSP_RESPID_set_by_key_ex(OCSP_RESPID *respid, X509 *cert, + OSSL_LIB_CTX *libctx, const char *propq); +int OCSP_RESPID_set_by_key(OCSP_RESPID *respid, X509 *cert); +int OCSP_RESPID_match_ex(OCSP_RESPID *respid, X509 *cert, OSSL_LIB_CTX *libctx, + const char *propq); +int OCSP_RESPID_match(OCSP_RESPID *respid, X509 *cert); + +X509_EXTENSION *OCSP_crlID_new(const char *url, long *n, char *tim); + +X509_EXTENSION *OCSP_accept_responses_new(char **oids); + +X509_EXTENSION *OCSP_archive_cutoff_new(char *tim); + +X509_EXTENSION *OCSP_url_svcloc_new(const X509_NAME *issuer, const char **urls); + +int OCSP_REQUEST_get_ext_count(OCSP_REQUEST *x); +int OCSP_REQUEST_get_ext_by_NID(OCSP_REQUEST *x, int nid, int lastpos); +int OCSP_REQUEST_get_ext_by_OBJ(OCSP_REQUEST *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_REQUEST_get_ext_by_critical(OCSP_REQUEST *x, int crit, int lastpos); +X509_EXTENSION *OCSP_REQUEST_get_ext(OCSP_REQUEST *x, int loc); +X509_EXTENSION *OCSP_REQUEST_delete_ext(OCSP_REQUEST *x, int loc); +void *OCSP_REQUEST_get1_ext_d2i(OCSP_REQUEST *x, int nid, int *crit, + int *idx); +int OCSP_REQUEST_add1_ext_i2d(OCSP_REQUEST *x, int nid, void *value, int crit, + unsigned long flags); +int OCSP_REQUEST_add_ext(OCSP_REQUEST *x, X509_EXTENSION *ex, int loc); + +int OCSP_ONEREQ_get_ext_count(OCSP_ONEREQ *x); +int OCSP_ONEREQ_get_ext_by_NID(OCSP_ONEREQ *x, int nid, int lastpos); +int OCSP_ONEREQ_get_ext_by_OBJ(OCSP_ONEREQ *x, const ASN1_OBJECT *obj, int lastpos); +int OCSP_ONEREQ_get_ext_by_critical(OCSP_ONEREQ *x, int crit, int lastpos); +X509_EXTENSION *OCSP_ONEREQ_get_ext(OCSP_ONEREQ *x, int loc); +X509_EXTENSION *OCSP_ONEREQ_delete_ext(OCSP_ONEREQ *x, int loc); +void *OCSP_ONEREQ_get1_ext_d2i(OCSP_ONEREQ *x, int nid, int *crit, int *idx); +int OCSP_ONEREQ_add1_ext_i2d(OCSP_ONEREQ *x, int nid, void *value, int crit, + unsigned long flags); +int OCSP_ONEREQ_add_ext(OCSP_ONEREQ *x, X509_EXTENSION *ex, int loc); + +int OCSP_BASICRESP_get_ext_count(OCSP_BASICRESP *x); +int OCSP_BASICRESP_get_ext_by_NID(OCSP_BASICRESP *x, int nid, int lastpos); +int OCSP_BASICRESP_get_ext_by_OBJ(OCSP_BASICRESP *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_BASICRESP_get_ext_by_critical(OCSP_BASICRESP *x, int crit, + int lastpos); +X509_EXTENSION *OCSP_BASICRESP_get_ext(OCSP_BASICRESP *x, int loc); +X509_EXTENSION *OCSP_BASICRESP_delete_ext(OCSP_BASICRESP *x, int loc); +void *OCSP_BASICRESP_get1_ext_d2i(OCSP_BASICRESP *x, int nid, int *crit, + int *idx); +int OCSP_BASICRESP_add1_ext_i2d(OCSP_BASICRESP *x, int nid, void *value, + int crit, unsigned long flags); +int OCSP_BASICRESP_add_ext(OCSP_BASICRESP *x, X509_EXTENSION *ex, int loc); + +int OCSP_SINGLERESP_get_ext_count(OCSP_SINGLERESP *x); +int OCSP_SINGLERESP_get_ext_by_NID(OCSP_SINGLERESP *x, int nid, int lastpos); +int OCSP_SINGLERESP_get_ext_by_OBJ(OCSP_SINGLERESP *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_SINGLERESP_get_ext_by_critical(OCSP_SINGLERESP *x, int crit, + int lastpos); +X509_EXTENSION *OCSP_SINGLERESP_get_ext(OCSP_SINGLERESP *x, int loc); +X509_EXTENSION *OCSP_SINGLERESP_delete_ext(OCSP_SINGLERESP *x, int loc); +void *OCSP_SINGLERESP_get1_ext_d2i(OCSP_SINGLERESP *x, int nid, int *crit, + int *idx); +int OCSP_SINGLERESP_add1_ext_i2d(OCSP_SINGLERESP *x, int nid, void *value, + int crit, unsigned long flags); +int OCSP_SINGLERESP_add_ext(OCSP_SINGLERESP *x, X509_EXTENSION *ex, int loc); +const OCSP_CERTID *OCSP_SINGLERESP_get0_id(const OCSP_SINGLERESP *x); + +DECLARE_ASN1_FUNCTIONS(OCSP_SINGLERESP) +DECLARE_ASN1_FUNCTIONS(OCSP_CERTSTATUS) +DECLARE_ASN1_FUNCTIONS(OCSP_REVOKEDINFO) +DECLARE_ASN1_FUNCTIONS(OCSP_BASICRESP) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPDATA) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPID) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPONSE) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPBYTES) +DECLARE_ASN1_FUNCTIONS(OCSP_ONEREQ) +DECLARE_ASN1_FUNCTIONS(OCSP_CERTID) +DECLARE_ASN1_FUNCTIONS(OCSP_REQUEST) +DECLARE_ASN1_FUNCTIONS(OCSP_SIGNATURE) +DECLARE_ASN1_FUNCTIONS(OCSP_REQINFO) +DECLARE_ASN1_FUNCTIONS(OCSP_CRLID) +DECLARE_ASN1_FUNCTIONS(OCSP_SERVICELOC) + +const char *OCSP_response_status_str(long s); +const char *OCSP_cert_status_str(long s); +const char *OCSP_crl_reason_str(long s); + +int OCSP_REQUEST_print(BIO *bp, OCSP_REQUEST *a, unsigned long flags); +int OCSP_RESPONSE_print(BIO *bp, OCSP_RESPONSE *o, unsigned long flags); + +int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, + X509_STORE *st, unsigned long flags); + + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_OCSP) */ +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h new file mode 100644 index 00000000000..b38d64da593 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h @@ -0,0 +1,114 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/opensslv.h.in + * + * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_OPENSSLV_H +# define OPENSSL_OPENSSLV_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * SECTION 1: VERSION DATA. These will change for each release + */ + +/* + * Base version macros + * + * These macros express version number MAJOR.MINOR.PATCH exactly + */ +# define OPENSSL_VERSION_MAJOR 3 +# define OPENSSL_VERSION_MINOR 2 +# define OPENSSL_VERSION_PATCH 1 + +/* + * Additional version information + * + * These are also part of the new version scheme, but aren't part + * of the version number itself. + */ + +/* Could be: #define OPENSSL_VERSION_PRE_RELEASE "-alpha.1" */ +# define OPENSSL_VERSION_PRE_RELEASE "" +/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+fips" */ +/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+vendor.1" */ +# define OPENSSL_VERSION_BUILD_METADATA "" + +/* + * Note: The OpenSSL Project will never define OPENSSL_VERSION_BUILD_METADATA + * to be anything but the empty string. Its use is entirely reserved for + * others + */ + +/* + * Shared library version + * + * This is strictly to express ABI version, which may or may not + * be related to the API version expressed with the macros above. + * This is defined in free form. + */ +# define OPENSSL_SHLIB_VERSION 3 + +/* + * SECTION 2: USEFUL MACROS + */ + +/* For checking general API compatibility when preprocessing */ +# define OPENSSL_VERSION_PREREQ(maj,min) \ + ((OPENSSL_VERSION_MAJOR << 16) + OPENSSL_VERSION_MINOR >= ((maj) << 16) + (min)) + +/* + * Macros to get the version in easily digested string form, both the short + * "MAJOR.MINOR.PATCH" variant (where MAJOR, MINOR and PATCH are replaced + * with the values from the corresponding OPENSSL_VERSION_ macros) and the + * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and + * OPENSSL_VERSION_BUILD_METADATA_STR appended. + */ +# define OPENSSL_VERSION_STR "3.2.1" +# define OPENSSL_FULL_VERSION_STR "3.2.1" + +/* + * SECTION 3: ADDITIONAL METADATA + * + * These strings are defined separately to allow them to be parsable. + */ +# define OPENSSL_RELEASE_DATE "30 Jan 2024" + +/* + * SECTION 4: BACKWARD COMPATIBILITY + */ + +# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" + +/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ +# ifdef OPENSSL_VERSION_PRE_RELEASE +# define _OPENSSL_VERSION_PRE_RELEASE 0x0L +# else +# define _OPENSSL_VERSION_PRE_RELEASE 0xfL +# endif +# define OPENSSL_VERSION_NUMBER \ + ( (OPENSSL_VERSION_MAJOR<<28) \ + |(OPENSSL_VERSION_MINOR<<20) \ + |(OPENSSL_VERSION_PATCH<<4) \ + |_OPENSSL_VERSION_PRE_RELEASE ) + +# ifdef __cplusplus +} +# endif + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_OPENSSLV_H +# endif + +#endif /* OPENSSL_OPENSSLV_H */ diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h new file mode 100644 index 00000000000..b08b0bc214c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs12.h @@ -0,0 +1,363 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/pkcs12.h.in + * + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_PKCS12_H +# define OPENSSL_PKCS12_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_PKCS12_H +# endif + +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define PKCS12_KEY_ID 1 +# define PKCS12_IV_ID 2 +# define PKCS12_MAC_ID 3 + +/* Default iteration count */ +# ifndef PKCS12_DEFAULT_ITER +# define PKCS12_DEFAULT_ITER PKCS5_DEFAULT_ITER +# endif + +# define PKCS12_MAC_KEY_LENGTH 20 + +/* The macro is expected to be used only internally. Kept for backwards compatibility. */ +# define PKCS12_SALT_LEN 8 + +/* It's not clear if these are actually needed... */ +# define PKCS12_key_gen PKCS12_key_gen_utf8 +# define PKCS12_add_friendlyname PKCS12_add_friendlyname_utf8 + +/* MS key usage constants */ + +# define KEY_EX 0x10 +# define KEY_SIG 0x80 + +typedef struct PKCS12_MAC_DATA_st PKCS12_MAC_DATA; + +typedef struct PKCS12_st PKCS12; + +typedef struct PKCS12_SAFEBAG_st PKCS12_SAFEBAG; + +SKM_DEFINE_STACK_OF_INTERNAL(PKCS12_SAFEBAG, PKCS12_SAFEBAG, PKCS12_SAFEBAG) +#define sk_PKCS12_SAFEBAG_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_value(sk, idx) ((PKCS12_SAFEBAG *)OPENSSL_sk_value(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk), (idx))) +#define sk_PKCS12_SAFEBAG_new(cmp) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new(ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp))) +#define sk_PKCS12_SAFEBAG_new_null() ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new_null()) +#define sk_PKCS12_SAFEBAG_new_reserve(cmp, n) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new_reserve(ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp), (n))) +#define sk_PKCS12_SAFEBAG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (n)) +#define sk_PKCS12_SAFEBAG_free(sk) OPENSSL_sk_free(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_delete(sk, i) ((PKCS12_SAFEBAG *)OPENSSL_sk_delete(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (i))) +#define sk_PKCS12_SAFEBAG_delete_ptr(sk, ptr) ((PKCS12_SAFEBAG *)OPENSSL_sk_delete_ptr(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr))) +#define sk_PKCS12_SAFEBAG_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_pop(sk) ((PKCS12_SAFEBAG *)OPENSSL_sk_pop(ossl_check_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_shift(sk) ((PKCS12_SAFEBAG *)OPENSSL_sk_shift(ossl_check_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS12_SAFEBAG_sk_type(sk),ossl_check_PKCS12_SAFEBAG_freefunc_type(freefunc)) +#define sk_PKCS12_SAFEBAG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr), (idx)) +#define sk_PKCS12_SAFEBAG_set(sk, idx, ptr) ((PKCS12_SAFEBAG *)OPENSSL_sk_set(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (idx), ossl_check_PKCS12_SAFEBAG_type(ptr))) +#define sk_PKCS12_SAFEBAG_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr), pnum) +#define sk_PKCS12_SAFEBAG_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_dup(sk) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_dup(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_copyfunc_type(copyfunc), ossl_check_PKCS12_SAFEBAG_freefunc_type(freefunc))) +#define sk_PKCS12_SAFEBAG_set_cmp_func(sk, cmp) ((sk_PKCS12_SAFEBAG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp))) + + +typedef struct pkcs12_bag_st PKCS12_BAGS; + +# define PKCS12_ERROR 0 +# define PKCS12_OK 1 + +/* Compatibility macros */ + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 + +# define M_PKCS12_bag_type PKCS12_bag_type +# define M_PKCS12_cert_bag_type PKCS12_cert_bag_type +# define M_PKCS12_crl_bag_type PKCS12_cert_bag_type + +# define PKCS12_certbag2x509 PKCS12_SAFEBAG_get1_cert +# define PKCS12_certbag2scrl PKCS12_SAFEBAG_get1_crl +# define PKCS12_bag_type PKCS12_SAFEBAG_get_nid +# define PKCS12_cert_bag_type PKCS12_SAFEBAG_get_bag_nid +# define PKCS12_x5092certbag PKCS12_SAFEBAG_create_cert +# define PKCS12_x509crl2certbag PKCS12_SAFEBAG_create_crl +# define PKCS12_MAKE_KEYBAG PKCS12_SAFEBAG_create0_p8inf +# define PKCS12_MAKE_SHKEYBAG PKCS12_SAFEBAG_create_pkcs8_encrypt + +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 ASN1_TYPE *PKCS12_get_attr(const PKCS12_SAFEBAG *bag, + int attr_nid); +#endif + +ASN1_TYPE *PKCS8_get_attr(PKCS8_PRIV_KEY_INFO *p8, int attr_nid); +int PKCS12_mac_present(const PKCS12 *p12); +void PKCS12_get0_mac(const ASN1_OCTET_STRING **pmac, + const X509_ALGOR **pmacalg, + const ASN1_OCTET_STRING **psalt, + const ASN1_INTEGER **piter, + const PKCS12 *p12); + +const ASN1_TYPE *PKCS12_SAFEBAG_get0_attr(const PKCS12_SAFEBAG *bag, + int attr_nid); +const ASN1_OBJECT *PKCS12_SAFEBAG_get0_type(const PKCS12_SAFEBAG *bag); +int PKCS12_SAFEBAG_get_nid(const PKCS12_SAFEBAG *bag); +int PKCS12_SAFEBAG_get_bag_nid(const PKCS12_SAFEBAG *bag); +const ASN1_TYPE *PKCS12_SAFEBAG_get0_bag_obj(const PKCS12_SAFEBAG *bag); +const ASN1_OBJECT *PKCS12_SAFEBAG_get0_bag_type(const PKCS12_SAFEBAG *bag); + +X509 *PKCS12_SAFEBAG_get1_cert_ex(const PKCS12_SAFEBAG *bag, OSSL_LIB_CTX *libctx, const char *propq); +X509 *PKCS12_SAFEBAG_get1_cert(const PKCS12_SAFEBAG *bag); +X509_CRL *PKCS12_SAFEBAG_get1_crl_ex(const PKCS12_SAFEBAG *bag, OSSL_LIB_CTX *libctx, const char *propq); +X509_CRL *PKCS12_SAFEBAG_get1_crl(const PKCS12_SAFEBAG *bag); +const STACK_OF(PKCS12_SAFEBAG) * +PKCS12_SAFEBAG_get0_safes(const PKCS12_SAFEBAG *bag); +const PKCS8_PRIV_KEY_INFO *PKCS12_SAFEBAG_get0_p8inf(const PKCS12_SAFEBAG *bag); +const X509_SIG *PKCS12_SAFEBAG_get0_pkcs8(const PKCS12_SAFEBAG *bag); + +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_cert(X509 *x509); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_crl(X509_CRL *crl); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_secret(int type, int vtype, const unsigned char *value, int len); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create0_p8inf(PKCS8_PRIV_KEY_INFO *p8); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create0_pkcs8(X509_SIG *p8); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_pkcs8_encrypt(int pbe_nid, + const char *pass, + int passlen, + unsigned char *salt, + int saltlen, int iter, + PKCS8_PRIV_KEY_INFO *p8inf); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_pkcs8_encrypt_ex(int pbe_nid, + const char *pass, + int passlen, + unsigned char *salt, + int saltlen, int iter, + PKCS8_PRIV_KEY_INFO *p8inf, + OSSL_LIB_CTX *ctx, + const char *propq); + +PKCS12_SAFEBAG *PKCS12_item_pack_safebag(void *obj, const ASN1_ITEM *it, + int nid1, int nid2); +PKCS8_PRIV_KEY_INFO *PKCS8_decrypt(const X509_SIG *p8, const char *pass, + int passlen); +PKCS8_PRIV_KEY_INFO *PKCS8_decrypt_ex(const X509_SIG *p8, const char *pass, + int passlen, OSSL_LIB_CTX *ctx, + const char *propq); +PKCS8_PRIV_KEY_INFO *PKCS12_decrypt_skey(const PKCS12_SAFEBAG *bag, + const char *pass, int passlen); +PKCS8_PRIV_KEY_INFO *PKCS12_decrypt_skey_ex(const PKCS12_SAFEBAG *bag, + const char *pass, int passlen, + OSSL_LIB_CTX *ctx, + const char *propq); +X509_SIG *PKCS8_encrypt(int pbe_nid, const EVP_CIPHER *cipher, + const char *pass, int passlen, unsigned char *salt, + int saltlen, int iter, PKCS8_PRIV_KEY_INFO *p8); +X509_SIG *PKCS8_encrypt_ex(int pbe_nid, const EVP_CIPHER *cipher, + const char *pass, int passlen, unsigned char *salt, + int saltlen, int iter, PKCS8_PRIV_KEY_INFO *p8, + OSSL_LIB_CTX *ctx, const char *propq); +X509_SIG *PKCS8_set0_pbe(const char *pass, int passlen, + PKCS8_PRIV_KEY_INFO *p8inf, X509_ALGOR *pbe); +X509_SIG *PKCS8_set0_pbe_ex(const char *pass, int passlen, + PKCS8_PRIV_KEY_INFO *p8inf, X509_ALGOR *pbe, + OSSL_LIB_CTX *ctx, const char *propq); +PKCS7 *PKCS12_pack_p7data(STACK_OF(PKCS12_SAFEBAG) *sk); +STACK_OF(PKCS12_SAFEBAG) *PKCS12_unpack_p7data(PKCS7 *p7); +PKCS7 *PKCS12_pack_p7encdata(int pbe_nid, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + STACK_OF(PKCS12_SAFEBAG) *bags); +PKCS7 *PKCS12_pack_p7encdata_ex(int pbe_nid, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + STACK_OF(PKCS12_SAFEBAG) *bags, + OSSL_LIB_CTX *ctx, const char *propq); + +STACK_OF(PKCS12_SAFEBAG) *PKCS12_unpack_p7encdata(PKCS7 *p7, const char *pass, + int passlen); + +int PKCS12_pack_authsafes(PKCS12 *p12, STACK_OF(PKCS7) *safes); +STACK_OF(PKCS7) *PKCS12_unpack_authsafes(const PKCS12 *p12); + +int PKCS12_add_localkeyid(PKCS12_SAFEBAG *bag, unsigned char *name, + int namelen); +int PKCS12_add_friendlyname_asc(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_friendlyname_utf8(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_CSPName_asc(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_friendlyname_uni(PKCS12_SAFEBAG *bag, + const unsigned char *name, int namelen); +int PKCS12_add1_attr_by_NID(PKCS12_SAFEBAG *bag, int nid, int type, + const unsigned char *bytes, int len); +int PKCS12_add1_attr_by_txt(PKCS12_SAFEBAG *bag, const char *attrname, int type, + const unsigned char *bytes, int len); +int PKCS8_add_keyusage(PKCS8_PRIV_KEY_INFO *p8, int usage); +ASN1_TYPE *PKCS12_get_attr_gen(const STACK_OF(X509_ATTRIBUTE) *attrs, + int attr_nid); +char *PKCS12_get_friendlyname(PKCS12_SAFEBAG *bag); +const STACK_OF(X509_ATTRIBUTE) * +PKCS12_SAFEBAG_get0_attrs(const PKCS12_SAFEBAG *bag); +void PKCS12_SAFEBAG_set0_attrs(PKCS12_SAFEBAG *bag, STACK_OF(X509_ATTRIBUTE) *attrs); +unsigned char *PKCS12_pbe_crypt(const X509_ALGOR *algor, + const char *pass, int passlen, + const unsigned char *in, int inlen, + unsigned char **data, int *datalen, + int en_de); +unsigned char *PKCS12_pbe_crypt_ex(const X509_ALGOR *algor, + const char *pass, int passlen, + const unsigned char *in, int inlen, + unsigned char **data, int *datalen, + int en_de, OSSL_LIB_CTX *libctx, + const char *propq); +void *PKCS12_item_decrypt_d2i(const X509_ALGOR *algor, const ASN1_ITEM *it, + const char *pass, int passlen, + const ASN1_OCTET_STRING *oct, int zbuf); +void *PKCS12_item_decrypt_d2i_ex(const X509_ALGOR *algor, const ASN1_ITEM *it, + const char *pass, int passlen, + const ASN1_OCTET_STRING *oct, int zbuf, + OSSL_LIB_CTX *libctx, + const char *propq); +ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt(X509_ALGOR *algor, + const ASN1_ITEM *it, + const char *pass, int passlen, + void *obj, int zbuf); +ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt_ex(X509_ALGOR *algor, + const ASN1_ITEM *it, + const char *pass, int passlen, + void *obj, int zbuf, + OSSL_LIB_CTX *ctx, + const char *propq); +PKCS12 *PKCS12_init(int mode); +PKCS12 *PKCS12_init_ex(int mode, OSSL_LIB_CTX *ctx, const char *propq); + +int PKCS12_key_gen_asc(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_asc_ex(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); +int PKCS12_key_gen_uni(unsigned char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_uni_ex(unsigned char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); +int PKCS12_key_gen_utf8(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_utf8_ex(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); + +int PKCS12_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, const EVP_CIPHER *cipher, + const EVP_MD *md_type, int en_de); +int PKCS12_PBE_keyivgen_ex(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, const EVP_CIPHER *cipher, + const EVP_MD *md_type, int en_de, + OSSL_LIB_CTX *libctx, const char *propq); +int PKCS12_gen_mac(PKCS12 *p12, const char *pass, int passlen, + unsigned char *mac, unsigned int *maclen); +int PKCS12_verify_mac(PKCS12 *p12, const char *pass, int passlen); +int PKCS12_set_mac(PKCS12 *p12, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + const EVP_MD *md_type); +int PKCS12_setup_mac(PKCS12 *p12, int iter, unsigned char *salt, + int saltlen, const EVP_MD *md_type); +unsigned char *OPENSSL_asc2uni(const char *asc, int asclen, + unsigned char **uni, int *unilen); +char *OPENSSL_uni2asc(const unsigned char *uni, int unilen); +unsigned char *OPENSSL_utf82uni(const char *asc, int asclen, + unsigned char **uni, int *unilen); +char *OPENSSL_uni2utf8(const unsigned char *uni, int unilen); + +DECLARE_ASN1_FUNCTIONS(PKCS12) +DECLARE_ASN1_FUNCTIONS(PKCS12_MAC_DATA) +DECLARE_ASN1_FUNCTIONS(PKCS12_SAFEBAG) +DECLARE_ASN1_FUNCTIONS(PKCS12_BAGS) + +DECLARE_ASN1_ITEM(PKCS12_SAFEBAGS) +DECLARE_ASN1_ITEM(PKCS12_AUTHSAFES) + +void PKCS12_PBE_add(void); +int PKCS12_parse(PKCS12 *p12, const char *pass, EVP_PKEY **pkey, X509 **cert, + STACK_OF(X509) **ca); +typedef int PKCS12_create_cb(PKCS12_SAFEBAG *bag, void *cbarg); +PKCS12 *PKCS12_create(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype); +PKCS12 *PKCS12_create_ex(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype, + OSSL_LIB_CTX *ctx, const char *propq); +PKCS12 *PKCS12_create_ex2(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype, + OSSL_LIB_CTX *ctx, const char *propq, + PKCS12_create_cb *cb, void *cbarg); + +PKCS12_SAFEBAG *PKCS12_add_cert(STACK_OF(PKCS12_SAFEBAG) **pbags, X509 *cert); +PKCS12_SAFEBAG *PKCS12_add_key(STACK_OF(PKCS12_SAFEBAG) **pbags, + EVP_PKEY *key, int key_usage, int iter, + int key_nid, const char *pass); +PKCS12_SAFEBAG *PKCS12_add_key_ex(STACK_OF(PKCS12_SAFEBAG) **pbags, + EVP_PKEY *key, int key_usage, int iter, + int key_nid, const char *pass, + OSSL_LIB_CTX *ctx, const char *propq); + +PKCS12_SAFEBAG *PKCS12_add_secret(STACK_OF(PKCS12_SAFEBAG) **pbags, + int nid_type, const unsigned char *value, int len); +int PKCS12_add_safe(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, + int safe_nid, int iter, const char *pass); +int PKCS12_add_safe_ex(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, + int safe_nid, int iter, const char *pass, + OSSL_LIB_CTX *ctx, const char *propq); + +PKCS12 *PKCS12_add_safes(STACK_OF(PKCS7) *safes, int p7_nid); +PKCS12 *PKCS12_add_safes_ex(STACK_OF(PKCS7) *safes, int p7_nid, + OSSL_LIB_CTX *ctx, const char *propq); + +int i2d_PKCS12_bio(BIO *bp, const PKCS12 *p12); +# ifndef OPENSSL_NO_STDIO +int i2d_PKCS12_fp(FILE *fp, const PKCS12 *p12); +# endif +PKCS12 *d2i_PKCS12_bio(BIO *bp, PKCS12 **p12); +# ifndef OPENSSL_NO_STDIO +PKCS12 *d2i_PKCS12_fp(FILE *fp, PKCS12 **p12); +# endif +int PKCS12_newpass(PKCS12 *p12, const char *oldpass, const char *newpass); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h new file mode 100644 index 00000000000..dc46c51118a --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/pkcs7.h @@ -0,0 +1,430 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/pkcs7.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_PKCS7_H +# define OPENSSL_PKCS7_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_PKCS7_H +# endif + +# include +# include +# include + +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/*- +Encryption_ID DES-CBC +Digest_ID MD5 +Digest_Encryption_ID rsaEncryption +Key_Encryption_ID rsaEncryption +*/ + +typedef struct PKCS7_CTX_st { + OSSL_LIB_CTX *libctx; + char *propq; +} PKCS7_CTX; + +typedef struct pkcs7_issuer_and_serial_st { + X509_NAME *issuer; + ASN1_INTEGER *serial; +} PKCS7_ISSUER_AND_SERIAL; + +typedef struct pkcs7_signer_info_st { + ASN1_INTEGER *version; /* version 1 */ + PKCS7_ISSUER_AND_SERIAL *issuer_and_serial; + X509_ALGOR *digest_alg; + STACK_OF(X509_ATTRIBUTE) *auth_attr; /* [ 0 ] */ + X509_ALGOR *digest_enc_alg; /* confusing name, actually used for signing */ + ASN1_OCTET_STRING *enc_digest; /* confusing name, actually signature */ + STACK_OF(X509_ATTRIBUTE) *unauth_attr; /* [ 1 ] */ + /* The private key to sign with */ + EVP_PKEY *pkey; + const PKCS7_CTX *ctx; +} PKCS7_SIGNER_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7_SIGNER_INFO, PKCS7_SIGNER_INFO, PKCS7_SIGNER_INFO) +#define sk_PKCS7_SIGNER_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_value(sk, idx) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_value(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk), (idx))) +#define sk_PKCS7_SIGNER_INFO_new(cmp) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new(ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp))) +#define sk_PKCS7_SIGNER_INFO_new_null() ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_SIGNER_INFO_new_reserve(cmp, n) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp), (n))) +#define sk_PKCS7_SIGNER_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (n)) +#define sk_PKCS7_SIGNER_INFO_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_delete(sk, i) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_delete(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (i))) +#define sk_PKCS7_SIGNER_INFO_delete_ptr(sk, ptr) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr))) +#define sk_PKCS7_SIGNER_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_pop(sk) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_pop(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_shift(sk) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_shift(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk),ossl_check_PKCS7_SIGNER_INFO_freefunc_type(freefunc)) +#define sk_PKCS7_SIGNER_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr), (idx)) +#define sk_PKCS7_SIGNER_INFO_set(sk, idx, ptr) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_set(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (idx), ossl_check_PKCS7_SIGNER_INFO_type(ptr))) +#define sk_PKCS7_SIGNER_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr), pnum) +#define sk_PKCS7_SIGNER_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_dup(sk) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_copyfunc_type(copyfunc), ossl_check_PKCS7_SIGNER_INFO_freefunc_type(freefunc))) +#define sk_PKCS7_SIGNER_INFO_set_cmp_func(sk, cmp) ((sk_PKCS7_SIGNER_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp))) + + +typedef struct pkcs7_recip_info_st { + ASN1_INTEGER *version; /* version 0 */ + PKCS7_ISSUER_AND_SERIAL *issuer_and_serial; + X509_ALGOR *key_enc_algor; + ASN1_OCTET_STRING *enc_key; + X509 *cert; /* get the pub-key from this */ + const PKCS7_CTX *ctx; +} PKCS7_RECIP_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7_RECIP_INFO, PKCS7_RECIP_INFO, PKCS7_RECIP_INFO) +#define sk_PKCS7_RECIP_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_value(sk, idx) ((PKCS7_RECIP_INFO *)OPENSSL_sk_value(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk), (idx))) +#define sk_PKCS7_RECIP_INFO_new(cmp) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new(ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp))) +#define sk_PKCS7_RECIP_INFO_new_null() ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_RECIP_INFO_new_reserve(cmp, n) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp), (n))) +#define sk_PKCS7_RECIP_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (n)) +#define sk_PKCS7_RECIP_INFO_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_delete(sk, i) ((PKCS7_RECIP_INFO *)OPENSSL_sk_delete(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (i))) +#define sk_PKCS7_RECIP_INFO_delete_ptr(sk, ptr) ((PKCS7_RECIP_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr))) +#define sk_PKCS7_RECIP_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_pop(sk) ((PKCS7_RECIP_INFO *)OPENSSL_sk_pop(ossl_check_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_shift(sk) ((PKCS7_RECIP_INFO *)OPENSSL_sk_shift(ossl_check_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_RECIP_INFO_sk_type(sk),ossl_check_PKCS7_RECIP_INFO_freefunc_type(freefunc)) +#define sk_PKCS7_RECIP_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr), (idx)) +#define sk_PKCS7_RECIP_INFO_set(sk, idx, ptr) ((PKCS7_RECIP_INFO *)OPENSSL_sk_set(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (idx), ossl_check_PKCS7_RECIP_INFO_type(ptr))) +#define sk_PKCS7_RECIP_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr), pnum) +#define sk_PKCS7_RECIP_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_dup(sk) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_copyfunc_type(copyfunc), ossl_check_PKCS7_RECIP_INFO_freefunc_type(freefunc))) +#define sk_PKCS7_RECIP_INFO_set_cmp_func(sk, cmp) ((sk_PKCS7_RECIP_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp))) + + + +typedef struct pkcs7_signed_st { + ASN1_INTEGER *version; /* version 1 */ + STACK_OF(X509_ALGOR) *md_algs; /* md used */ + STACK_OF(X509) *cert; /* [ 0 ] */ /* name should be 'certificates' */ + STACK_OF(X509_CRL) *crl; /* [ 1 ] */ /* name should be 'crls' */ + STACK_OF(PKCS7_SIGNER_INFO) *signer_info; + struct pkcs7_st *contents; +} PKCS7_SIGNED; +/* + * The above structure is very very similar to PKCS7_SIGN_ENVELOPE. How about + * merging the two + */ + +typedef struct pkcs7_enc_content_st { + ASN1_OBJECT *content_type; + X509_ALGOR *algorithm; + ASN1_OCTET_STRING *enc_data; /* [ 0 ] */ + const EVP_CIPHER *cipher; + const PKCS7_CTX *ctx; +} PKCS7_ENC_CONTENT; + +typedef struct pkcs7_enveloped_st { + ASN1_INTEGER *version; /* version 0 */ + STACK_OF(PKCS7_RECIP_INFO) *recipientinfo; + PKCS7_ENC_CONTENT *enc_data; +} PKCS7_ENVELOPE; + +typedef struct pkcs7_signedandenveloped_st { + ASN1_INTEGER *version; /* version 1 */ + STACK_OF(X509_ALGOR) *md_algs; /* md used */ + STACK_OF(X509) *cert; /* [ 0 ] */ /* name should be 'certificates' */ + STACK_OF(X509_CRL) *crl; /* [ 1 ] */ /* name should be 'crls' */ + STACK_OF(PKCS7_SIGNER_INFO) *signer_info; + PKCS7_ENC_CONTENT *enc_data; + STACK_OF(PKCS7_RECIP_INFO) *recipientinfo; +} PKCS7_SIGN_ENVELOPE; + +typedef struct pkcs7_digest_st { + ASN1_INTEGER *version; /* version 0 */ + X509_ALGOR *md; /* md used */ + struct pkcs7_st *contents; + ASN1_OCTET_STRING *digest; +} PKCS7_DIGEST; + +typedef struct pkcs7_encrypted_st { + ASN1_INTEGER *version; /* version 0 */ + PKCS7_ENC_CONTENT *enc_data; +} PKCS7_ENCRYPT; + +typedef struct pkcs7_st { + /* + * The following is non NULL if it contains ASN1 encoding of this + * structure + */ + unsigned char *asn1; + long length; +# define PKCS7_S_HEADER 0 +# define PKCS7_S_BODY 1 +# define PKCS7_S_TAIL 2 + int state; /* used during processing */ + int detached; + ASN1_OBJECT *type; + /* content as defined by the type */ + /* + * all encryption/message digests are applied to the 'contents', leaving + * out the 'type' field. + */ + union { + char *ptr; + /* NID_pkcs7_data */ + ASN1_OCTET_STRING *data; + /* NID_pkcs7_signed */ + PKCS7_SIGNED *sign; /* field name 'signed' would clash with C keyword */ + /* NID_pkcs7_enveloped */ + PKCS7_ENVELOPE *enveloped; + /* NID_pkcs7_signedAndEnveloped */ + PKCS7_SIGN_ENVELOPE *signed_and_enveloped; + /* NID_pkcs7_digest */ + PKCS7_DIGEST *digest; + /* NID_pkcs7_encrypted */ + PKCS7_ENCRYPT *encrypted; + /* Anything else */ + ASN1_TYPE *other; + } d; + PKCS7_CTX ctx; +} PKCS7; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7, PKCS7, PKCS7) +#define sk_PKCS7_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_sk_type(sk)) +#define sk_PKCS7_value(sk, idx) ((PKCS7 *)OPENSSL_sk_value(ossl_check_const_PKCS7_sk_type(sk), (idx))) +#define sk_PKCS7_new(cmp) ((STACK_OF(PKCS7) *)OPENSSL_sk_new(ossl_check_PKCS7_compfunc_type(cmp))) +#define sk_PKCS7_new_null() ((STACK_OF(PKCS7) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_new_reserve(cmp, n) ((STACK_OF(PKCS7) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_compfunc_type(cmp), (n))) +#define sk_PKCS7_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_sk_type(sk), (n)) +#define sk_PKCS7_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_delete(sk, i) ((PKCS7 *)OPENSSL_sk_delete(ossl_check_PKCS7_sk_type(sk), (i))) +#define sk_PKCS7_delete_ptr(sk, ptr) ((PKCS7 *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr))) +#define sk_PKCS7_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_pop(sk) ((PKCS7 *)OPENSSL_sk_pop(ossl_check_PKCS7_sk_type(sk))) +#define sk_PKCS7_shift(sk) ((PKCS7 *)OPENSSL_sk_shift(ossl_check_PKCS7_sk_type(sk))) +#define sk_PKCS7_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_sk_type(sk),ossl_check_PKCS7_freefunc_type(freefunc)) +#define sk_PKCS7_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr), (idx)) +#define sk_PKCS7_set(sk, idx, ptr) ((PKCS7 *)OPENSSL_sk_set(ossl_check_PKCS7_sk_type(sk), (idx), ossl_check_PKCS7_type(ptr))) +#define sk_PKCS7_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr), pnum) +#define sk_PKCS7_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_sk_type(sk)) +#define sk_PKCS7_dup(sk) ((STACK_OF(PKCS7) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_sk_type(sk))) +#define sk_PKCS7_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_sk_type(sk), ossl_check_PKCS7_copyfunc_type(copyfunc), ossl_check_PKCS7_freefunc_type(freefunc))) +#define sk_PKCS7_set_cmp_func(sk, cmp) ((sk_PKCS7_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_compfunc_type(cmp))) + + + +# define PKCS7_OP_SET_DETACHED_SIGNATURE 1 +# define PKCS7_OP_GET_DETACHED_SIGNATURE 2 + +# define PKCS7_get_signed_attributes(si) ((si)->auth_attr) +# define PKCS7_get_attributes(si) ((si)->unauth_attr) + +# define PKCS7_type_is_signed(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_signed) +# define PKCS7_type_is_encrypted(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_encrypted) +# define PKCS7_type_is_enveloped(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_enveloped) +# define PKCS7_type_is_signedAndEnveloped(a) \ + (OBJ_obj2nid((a)->type) == NID_pkcs7_signedAndEnveloped) +# define PKCS7_type_is_data(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_data) +# define PKCS7_type_is_digest(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_digest) + +# define PKCS7_set_detached(p,v) \ + PKCS7_ctrl(p,PKCS7_OP_SET_DETACHED_SIGNATURE,v,NULL) +# define PKCS7_get_detached(p) \ + PKCS7_ctrl(p,PKCS7_OP_GET_DETACHED_SIGNATURE,0,NULL) + +# define PKCS7_is_detached(p7) (PKCS7_type_is_signed(p7) && PKCS7_get_detached(p7)) + +/* S/MIME related flags */ + +# define PKCS7_TEXT 0x1 +# define PKCS7_NOCERTS 0x2 +# define PKCS7_NOSIGS 0x4 +# define PKCS7_NOCHAIN 0x8 +# define PKCS7_NOINTERN 0x10 +# define PKCS7_NOVERIFY 0x20 +# define PKCS7_DETACHED 0x40 +# define PKCS7_BINARY 0x80 +# define PKCS7_NOATTR 0x100 +# define PKCS7_NOSMIMECAP 0x200 +# define PKCS7_NOOLDMIMETYPE 0x400 +# define PKCS7_CRLFEOL 0x800 +# define PKCS7_STREAM 0x1000 +# define PKCS7_NOCRL 0x2000 +# define PKCS7_PARTIAL 0x4000 +# define PKCS7_REUSE_DIGEST 0x8000 +# define PKCS7_NO_DUAL_CONTENT 0x10000 + +/* Flags: for compatibility with older code */ + +# define SMIME_TEXT PKCS7_TEXT +# define SMIME_NOCERTS PKCS7_NOCERTS +# define SMIME_NOSIGS PKCS7_NOSIGS +# define SMIME_NOCHAIN PKCS7_NOCHAIN +# define SMIME_NOINTERN PKCS7_NOINTERN +# define SMIME_NOVERIFY PKCS7_NOVERIFY +# define SMIME_DETACHED PKCS7_DETACHED +# define SMIME_BINARY PKCS7_BINARY +# define SMIME_NOATTR PKCS7_NOATTR + +/* CRLF ASCII canonicalisation */ +# define SMIME_ASCIICRLF 0x80000 + +DECLARE_ASN1_FUNCTIONS(PKCS7_ISSUER_AND_SERIAL) + +int PKCS7_ISSUER_AND_SERIAL_digest(PKCS7_ISSUER_AND_SERIAL *data, + const EVP_MD *type, unsigned char *md, + unsigned int *len); +# ifndef OPENSSL_NO_STDIO +PKCS7 *d2i_PKCS7_fp(FILE *fp, PKCS7 **p7); +int i2d_PKCS7_fp(FILE *fp, const PKCS7 *p7); +# endif +DECLARE_ASN1_DUP_FUNCTION(PKCS7) +PKCS7 *d2i_PKCS7_bio(BIO *bp, PKCS7 **p7); +int i2d_PKCS7_bio(BIO *bp, const PKCS7 *p7); +int i2d_PKCS7_bio_stream(BIO *out, PKCS7 *p7, BIO *in, int flags); +int PEM_write_bio_PKCS7_stream(BIO *out, PKCS7 *p7, BIO *in, int flags); + +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGNER_INFO) +DECLARE_ASN1_FUNCTIONS(PKCS7_RECIP_INFO) +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGNED) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENC_CONTENT) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENVELOPE) +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGN_ENVELOPE) +DECLARE_ASN1_FUNCTIONS(PKCS7_DIGEST) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENCRYPT) +DECLARE_ASN1_FUNCTIONS(PKCS7) +PKCS7 *PKCS7_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +DECLARE_ASN1_ITEM(PKCS7_ATTR_SIGN) +DECLARE_ASN1_ITEM(PKCS7_ATTR_VERIFY) + +DECLARE_ASN1_NDEF_FUNCTION(PKCS7) +DECLARE_ASN1_PRINT_FUNCTION(PKCS7) + +long PKCS7_ctrl(PKCS7 *p7, int cmd, long larg, char *parg); + +int PKCS7_type_is_other(PKCS7 *p7); +int PKCS7_set_type(PKCS7 *p7, int type); +int PKCS7_set0_type_other(PKCS7 *p7, int type, ASN1_TYPE *other); +int PKCS7_set_content(PKCS7 *p7, PKCS7 *p7_data); +int PKCS7_SIGNER_INFO_set(PKCS7_SIGNER_INFO *p7i, X509 *x509, EVP_PKEY *pkey, + const EVP_MD *dgst); +int PKCS7_SIGNER_INFO_sign(PKCS7_SIGNER_INFO *si); +int PKCS7_add_signer(PKCS7 *p7, PKCS7_SIGNER_INFO *p7i); +int PKCS7_add_certificate(PKCS7 *p7, X509 *cert); +int PKCS7_add_crl(PKCS7 *p7, X509_CRL *crl); +int PKCS7_content_new(PKCS7 *p7, int nid); +int PKCS7_dataVerify(X509_STORE *cert_store, X509_STORE_CTX *ctx, + BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si); +int PKCS7_signatureVerify(BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si, + X509 *signer); + +BIO *PKCS7_dataInit(PKCS7 *p7, BIO *bio); +int PKCS7_dataFinal(PKCS7 *p7, BIO *bio); +BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert); + +PKCS7_SIGNER_INFO *PKCS7_add_signature(PKCS7 *p7, X509 *x509, + EVP_PKEY *pkey, const EVP_MD *dgst); +X509 *PKCS7_cert_from_signer_info(PKCS7 *p7, PKCS7_SIGNER_INFO *si); +int PKCS7_set_digest(PKCS7 *p7, const EVP_MD *md); +STACK_OF(PKCS7_SIGNER_INFO) *PKCS7_get_signer_info(PKCS7 *p7); + +PKCS7_RECIP_INFO *PKCS7_add_recipient(PKCS7 *p7, X509 *x509); +void PKCS7_SIGNER_INFO_get0_algs(PKCS7_SIGNER_INFO *si, EVP_PKEY **pk, + X509_ALGOR **pdig, X509_ALGOR **psig); +void PKCS7_RECIP_INFO_get0_alg(PKCS7_RECIP_INFO *ri, X509_ALGOR **penc); +int PKCS7_add_recipient_info(PKCS7 *p7, PKCS7_RECIP_INFO *ri); +int PKCS7_RECIP_INFO_set(PKCS7_RECIP_INFO *p7i, X509 *x509); +int PKCS7_set_cipher(PKCS7 *p7, const EVP_CIPHER *cipher); +int PKCS7_stream(unsigned char ***boundary, PKCS7 *p7); + +PKCS7_ISSUER_AND_SERIAL *PKCS7_get_issuer_and_serial(PKCS7 *p7, int idx); +ASN1_OCTET_STRING *PKCS7_get_octet_string(PKCS7 *p7); +ASN1_OCTET_STRING *PKCS7_digest_from_attributes(STACK_OF(X509_ATTRIBUTE) *sk); +int PKCS7_add_signed_attribute(PKCS7_SIGNER_INFO *p7si, int nid, int type, + void *data); +int PKCS7_add_attribute(PKCS7_SIGNER_INFO *p7si, int nid, int atrtype, + void *value); +ASN1_TYPE *PKCS7_get_attribute(const PKCS7_SIGNER_INFO *si, int nid); +ASN1_TYPE *PKCS7_get_signed_attribute(const PKCS7_SIGNER_INFO *si, int nid); +int PKCS7_set_signed_attributes(PKCS7_SIGNER_INFO *p7si, + STACK_OF(X509_ATTRIBUTE) *sk); +int PKCS7_set_attributes(PKCS7_SIGNER_INFO *p7si, + STACK_OF(X509_ATTRIBUTE) *sk); + +PKCS7 *PKCS7_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, int flags); +PKCS7 *PKCS7_sign_ex(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +PKCS7_SIGNER_INFO *PKCS7_sign_add_signer(PKCS7 *p7, + X509 *signcert, EVP_PKEY *pkey, + const EVP_MD *md, int flags); + +int PKCS7_final(PKCS7 *p7, BIO *data, int flags); +int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, + BIO *indata, BIO *out, int flags); +STACK_OF(X509) *PKCS7_get0_signers(PKCS7 *p7, STACK_OF(X509) *certs, + int flags); +PKCS7 *PKCS7_encrypt(STACK_OF(X509) *certs, BIO *in, const EVP_CIPHER *cipher, + int flags); +PKCS7 *PKCS7_encrypt_ex(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, int flags, + OSSL_LIB_CTX *libctx, const char *propq); +int PKCS7_decrypt(PKCS7 *p7, EVP_PKEY *pkey, X509 *cert, BIO *data, + int flags); + +int PKCS7_add_attrib_smimecap(PKCS7_SIGNER_INFO *si, + STACK_OF(X509_ALGOR) *cap); +STACK_OF(X509_ALGOR) *PKCS7_get_smimecap(PKCS7_SIGNER_INFO *si); +int PKCS7_simple_smimecap(STACK_OF(X509_ALGOR) *sk, int nid, int arg); + +int PKCS7_add_attrib_content_type(PKCS7_SIGNER_INFO *si, ASN1_OBJECT *coid); +int PKCS7_add0_attrib_signing_time(PKCS7_SIGNER_INFO *si, ASN1_TIME *t); +int PKCS7_add1_attrib_digest(PKCS7_SIGNER_INFO *si, + const unsigned char *md, int mdlen); + +int SMIME_write_PKCS7(BIO *bio, PKCS7 *p7, BIO *data, int flags); +PKCS7 *SMIME_read_PKCS7_ex(BIO *bio, BIO **bcont, PKCS7 **p7); +PKCS7 *SMIME_read_PKCS7(BIO *bio, BIO **bcont); + +BIO *BIO_new_PKCS7(BIO *out, PKCS7 *p7); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h new file mode 100644 index 00000000000..0499700b562 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/safestack.h @@ -0,0 +1,297 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/safestack.h.in + * + * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_SAFESTACK_H +# define OPENSSL_SAFESTACK_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SAFESTACK_H +# endif + +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# define STACK_OF(type) struct stack_st_##type + +/* Helper macro for internal use */ +# define SKM_DEFINE_STACK_OF_INTERNAL(t1, t2, t3) \ + STACK_OF(t1); \ + typedef int (*sk_##t1##_compfunc)(const t3 * const *a, const t3 *const *b); \ + typedef void (*sk_##t1##_freefunc)(t3 *a); \ + typedef t3 * (*sk_##t1##_copyfunc)(const t3 *a); \ + static ossl_unused ossl_inline t2 *ossl_check_##t1##_type(t2 *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_##t1##_sk_type(const STACK_OF(t1) *sk) \ + { \ + return (const OPENSSL_STACK *)sk; \ + } \ + static ossl_unused ossl_inline OPENSSL_STACK *ossl_check_##t1##_sk_type(STACK_OF(t1) *sk) \ + { \ + return (OPENSSL_STACK *)sk; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_##t1##_compfunc_type(sk_##t1##_compfunc cmp) \ + { \ + return (OPENSSL_sk_compfunc)cmp; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_##t1##_copyfunc_type(sk_##t1##_copyfunc cpy) \ + { \ + return (OPENSSL_sk_copyfunc)cpy; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_##t1##_freefunc_type(sk_##t1##_freefunc fr) \ + { \ + return (OPENSSL_sk_freefunc)fr; \ + } + +# define SKM_DEFINE_STACK_OF(t1, t2, t3) \ + STACK_OF(t1); \ + typedef int (*sk_##t1##_compfunc)(const t3 * const *a, const t3 *const *b); \ + typedef void (*sk_##t1##_freefunc)(t3 *a); \ + typedef t3 * (*sk_##t1##_copyfunc)(const t3 *a); \ + static ossl_unused ossl_inline int sk_##t1##_num(const STACK_OF(t1) *sk) \ + { \ + return OPENSSL_sk_num((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_value(const STACK_OF(t1) *sk, int idx) \ + { \ + return (t2 *)OPENSSL_sk_value((const OPENSSL_STACK *)sk, idx); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new(sk_##t1##_compfunc compare) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new((OPENSSL_sk_compfunc)compare); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new_null(void) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new_null(); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new_reserve(sk_##t1##_compfunc compare, int n) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new_reserve((OPENSSL_sk_compfunc)compare, n); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_reserve(STACK_OF(t1) *sk, int n) \ + { \ + return OPENSSL_sk_reserve((OPENSSL_STACK *)sk, n); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_free(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_free((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_zero(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_zero((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_delete(STACK_OF(t1) *sk, int i) \ + { \ + return (t2 *)OPENSSL_sk_delete((OPENSSL_STACK *)sk, i); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_delete_ptr(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return (t2 *)OPENSSL_sk_delete_ptr((OPENSSL_STACK *)sk, \ + (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_push(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_push((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_unshift(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_unshift((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_pop(STACK_OF(t1) *sk) \ + { \ + return (t2 *)OPENSSL_sk_pop((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_shift(STACK_OF(t1) *sk) \ + { \ + return (t2 *)OPENSSL_sk_shift((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_pop_free(STACK_OF(t1) *sk, sk_##t1##_freefunc freefunc) \ + { \ + OPENSSL_sk_pop_free((OPENSSL_STACK *)sk, (OPENSSL_sk_freefunc)freefunc); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_insert(STACK_OF(t1) *sk, t2 *ptr, int idx) \ + { \ + return OPENSSL_sk_insert((OPENSSL_STACK *)sk, (const void *)ptr, idx); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_set(STACK_OF(t1) *sk, int idx, t2 *ptr) \ + { \ + return (t2 *)OPENSSL_sk_set((OPENSSL_STACK *)sk, idx, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_find((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find_ex(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_find_ex((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find_all(STACK_OF(t1) *sk, t2 *ptr, int *pnum) \ + { \ + return OPENSSL_sk_find_all((OPENSSL_STACK *)sk, (const void *)ptr, pnum); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_sort(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_sort((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_is_sorted(const STACK_OF(t1) *sk) \ + { \ + return OPENSSL_sk_is_sorted((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) * sk_##t1##_dup(const STACK_OF(t1) *sk) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_dup((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_deep_copy(const STACK_OF(t1) *sk, \ + sk_##t1##_copyfunc copyfunc, \ + sk_##t1##_freefunc freefunc) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_deep_copy((const OPENSSL_STACK *)sk, \ + (OPENSSL_sk_copyfunc)copyfunc, \ + (OPENSSL_sk_freefunc)freefunc); \ + } \ + static ossl_unused ossl_inline sk_##t1##_compfunc sk_##t1##_set_cmp_func(STACK_OF(t1) *sk, sk_##t1##_compfunc compare) \ + { \ + return (sk_##t1##_compfunc)OPENSSL_sk_set_cmp_func((OPENSSL_STACK *)sk, (OPENSSL_sk_compfunc)compare); \ + } + +# define DEFINE_STACK_OF(t) SKM_DEFINE_STACK_OF(t, t, t) +# define DEFINE_STACK_OF_CONST(t) SKM_DEFINE_STACK_OF(t, const t, t) +# define DEFINE_SPECIAL_STACK_OF(t1, t2) SKM_DEFINE_STACK_OF(t1, t2, t2) +# define DEFINE_SPECIAL_STACK_OF_CONST(t1, t2) \ + SKM_DEFINE_STACK_OF(t1, const t2, t2) + +/*- + * Strings are special: normally an lhash entry will point to a single + * (somewhat) mutable object. In the case of strings: + * + * a) Instead of a single char, there is an array of chars, NUL-terminated. + * b) The string may have be immutable. + * + * So, they need their own declarations. Especially important for + * type-checking tools, such as Deputy. + * + * In practice, however, it appears to be hard to have a const + * string. For now, I'm settling for dealing with the fact it is a + * string at all. + */ +typedef char *OPENSSL_STRING; +typedef const char *OPENSSL_CSTRING; + +/*- + * Confusingly, LHASH_OF(STRING) deals with char ** throughout, but + * STACK_OF(STRING) is really more like STACK_OF(char), only, as mentioned + * above, instead of a single char each entry is a NUL-terminated array of + * chars. So, we have to implement STRING specially for STACK_OF. This is + * dealt with in the autogenerated macros below. + */ +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_STRING, char, char) +#define sk_OPENSSL_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_value(sk, idx) ((char *)OPENSSL_sk_value(ossl_check_const_OPENSSL_STRING_sk_type(sk), (idx))) +#define sk_OPENSSL_STRING_new(cmp) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new(ossl_check_OPENSSL_STRING_compfunc_type(cmp))) +#define sk_OPENSSL_STRING_new_null() ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_STRING_new_reserve(cmp, n) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_STRING_compfunc_type(cmp), (n))) +#define sk_OPENSSL_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_STRING_sk_type(sk), (n)) +#define sk_OPENSSL_STRING_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_delete(sk, i) ((char *)OPENSSL_sk_delete(ossl_check_OPENSSL_STRING_sk_type(sk), (i))) +#define sk_OPENSSL_STRING_delete_ptr(sk, ptr) ((char *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr))) +#define sk_OPENSSL_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_pop(sk) ((char *)OPENSSL_sk_pop(ossl_check_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_shift(sk) ((char *)OPENSSL_sk_shift(ossl_check_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_STRING_sk_type(sk),ossl_check_OPENSSL_STRING_freefunc_type(freefunc)) +#define sk_OPENSSL_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr), (idx)) +#define sk_OPENSSL_STRING_set(sk, idx, ptr) ((char *)OPENSSL_sk_set(ossl_check_OPENSSL_STRING_sk_type(sk), (idx), ossl_check_OPENSSL_STRING_type(ptr))) +#define sk_OPENSSL_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr), pnum) +#define sk_OPENSSL_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_dup(sk) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_copyfunc_type(copyfunc), ossl_check_OPENSSL_STRING_freefunc_type(freefunc))) +#define sk_OPENSSL_STRING_set_cmp_func(sk, cmp) ((sk_OPENSSL_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_CSTRING, const char, char) +#define sk_OPENSSL_CSTRING_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_value(sk, idx) ((const char *)OPENSSL_sk_value(ossl_check_const_OPENSSL_CSTRING_sk_type(sk), (idx))) +#define sk_OPENSSL_CSTRING_new(cmp) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new(ossl_check_OPENSSL_CSTRING_compfunc_type(cmp))) +#define sk_OPENSSL_CSTRING_new_null() ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_CSTRING_new_reserve(cmp, n) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_CSTRING_compfunc_type(cmp), (n))) +#define sk_OPENSSL_CSTRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_CSTRING_sk_type(sk), (n)) +#define sk_OPENSSL_CSTRING_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_delete(sk, i) ((const char *)OPENSSL_sk_delete(ossl_check_OPENSSL_CSTRING_sk_type(sk), (i))) +#define sk_OPENSSL_CSTRING_delete_ptr(sk, ptr) ((const char *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr))) +#define sk_OPENSSL_CSTRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_pop(sk) ((const char *)OPENSSL_sk_pop(ossl_check_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_shift(sk) ((const char *)OPENSSL_sk_shift(ossl_check_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_CSTRING_sk_type(sk),ossl_check_OPENSSL_CSTRING_freefunc_type(freefunc)) +#define sk_OPENSSL_CSTRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr), (idx)) +#define sk_OPENSSL_CSTRING_set(sk, idx, ptr) ((const char *)OPENSSL_sk_set(ossl_check_OPENSSL_CSTRING_sk_type(sk), (idx), ossl_check_OPENSSL_CSTRING_type(ptr))) +#define sk_OPENSSL_CSTRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr), pnum) +#define sk_OPENSSL_CSTRING_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_dup(sk) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_copyfunc_type(copyfunc), ossl_check_OPENSSL_CSTRING_freefunc_type(freefunc))) +#define sk_OPENSSL_CSTRING_set_cmp_func(sk, cmp) ((sk_OPENSSL_CSTRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_compfunc_type(cmp))) + + +#if !defined(OPENSSL_NO_DEPRECATED_3_0) +/* + * This is not used by OpenSSL. A block of bytes, NOT nul-terminated. + * These should also be distinguished from "normal" stacks. + */ +typedef void *OPENSSL_BLOCK; +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_BLOCK, void, void) +#define sk_OPENSSL_BLOCK_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_value(sk, idx) ((void *)OPENSSL_sk_value(ossl_check_const_OPENSSL_BLOCK_sk_type(sk), (idx))) +#define sk_OPENSSL_BLOCK_new(cmp) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new(ossl_check_OPENSSL_BLOCK_compfunc_type(cmp))) +#define sk_OPENSSL_BLOCK_new_null() ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_BLOCK_new_reserve(cmp, n) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_BLOCK_compfunc_type(cmp), (n))) +#define sk_OPENSSL_BLOCK_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_BLOCK_sk_type(sk), (n)) +#define sk_OPENSSL_BLOCK_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_delete(sk, i) ((void *)OPENSSL_sk_delete(ossl_check_OPENSSL_BLOCK_sk_type(sk), (i))) +#define sk_OPENSSL_BLOCK_delete_ptr(sk, ptr) ((void *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr))) +#define sk_OPENSSL_BLOCK_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_pop(sk) ((void *)OPENSSL_sk_pop(ossl_check_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_shift(sk) ((void *)OPENSSL_sk_shift(ossl_check_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_BLOCK_sk_type(sk),ossl_check_OPENSSL_BLOCK_freefunc_type(freefunc)) +#define sk_OPENSSL_BLOCK_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr), (idx)) +#define sk_OPENSSL_BLOCK_set(sk, idx, ptr) ((void *)OPENSSL_sk_set(ossl_check_OPENSSL_BLOCK_sk_type(sk), (idx), ossl_check_OPENSSL_BLOCK_type(ptr))) +#define sk_OPENSSL_BLOCK_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr), pnum) +#define sk_OPENSSL_BLOCK_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_dup(sk) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_copyfunc_type(copyfunc), ossl_check_OPENSSL_BLOCK_freefunc_type(freefunc))) +#define sk_OPENSSL_BLOCK_set_cmp_func(sk, cmp) ((sk_OPENSSL_BLOCK_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_compfunc_type(cmp))) + +#endif + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h new file mode 100644 index 00000000000..a48766c6ce8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/srp.h @@ -0,0 +1,285 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/srp.h.in + * + * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2004, EdelKey Project. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Christophe Renou and Peter Sylvester, + * for the EdelKey project. + */ + + + +#ifndef OPENSSL_SRP_H +# define OPENSSL_SRP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SRP_H +# endif + +#include + +#ifndef OPENSSL_NO_SRP +# include +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 + +typedef struct SRP_gN_cache_st { + char *b64_bn; + BIGNUM *bn; +} SRP_gN_cache; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_gN_cache, SRP_gN_cache, SRP_gN_cache) +#define sk_SRP_gN_cache_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_value(sk, idx) ((SRP_gN_cache *)OPENSSL_sk_value(ossl_check_const_SRP_gN_cache_sk_type(sk), (idx))) +#define sk_SRP_gN_cache_new(cmp) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new(ossl_check_SRP_gN_cache_compfunc_type(cmp))) +#define sk_SRP_gN_cache_new_null() ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new_null()) +#define sk_SRP_gN_cache_new_reserve(cmp, n) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new_reserve(ossl_check_SRP_gN_cache_compfunc_type(cmp), (n))) +#define sk_SRP_gN_cache_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_gN_cache_sk_type(sk), (n)) +#define sk_SRP_gN_cache_free(sk) OPENSSL_sk_free(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_delete(sk, i) ((SRP_gN_cache *)OPENSSL_sk_delete(ossl_check_SRP_gN_cache_sk_type(sk), (i))) +#define sk_SRP_gN_cache_delete_ptr(sk, ptr) ((SRP_gN_cache *)OPENSSL_sk_delete_ptr(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr))) +#define sk_SRP_gN_cache_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_pop(sk) ((SRP_gN_cache *)OPENSSL_sk_pop(ossl_check_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_shift(sk) ((SRP_gN_cache *)OPENSSL_sk_shift(ossl_check_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_gN_cache_sk_type(sk),ossl_check_SRP_gN_cache_freefunc_type(freefunc)) +#define sk_SRP_gN_cache_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr), (idx)) +#define sk_SRP_gN_cache_set(sk, idx, ptr) ((SRP_gN_cache *)OPENSSL_sk_set(ossl_check_SRP_gN_cache_sk_type(sk), (idx), ossl_check_SRP_gN_cache_type(ptr))) +#define sk_SRP_gN_cache_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr), pnum) +#define sk_SRP_gN_cache_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_dup(sk) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_dup(ossl_check_const_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_copyfunc_type(copyfunc), ossl_check_SRP_gN_cache_freefunc_type(freefunc))) +#define sk_SRP_gN_cache_set_cmp_func(sk, cmp) ((sk_SRP_gN_cache_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_compfunc_type(cmp))) + + + +typedef struct SRP_user_pwd_st { + /* Owned by us. */ + char *id; + BIGNUM *s; + BIGNUM *v; + /* Not owned by us. */ + const BIGNUM *g; + const BIGNUM *N; + /* Owned by us. */ + char *info; +} SRP_user_pwd; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_user_pwd, SRP_user_pwd, SRP_user_pwd) +#define sk_SRP_user_pwd_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_value(sk, idx) ((SRP_user_pwd *)OPENSSL_sk_value(ossl_check_const_SRP_user_pwd_sk_type(sk), (idx))) +#define sk_SRP_user_pwd_new(cmp) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new(ossl_check_SRP_user_pwd_compfunc_type(cmp))) +#define sk_SRP_user_pwd_new_null() ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new_null()) +#define sk_SRP_user_pwd_new_reserve(cmp, n) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new_reserve(ossl_check_SRP_user_pwd_compfunc_type(cmp), (n))) +#define sk_SRP_user_pwd_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_user_pwd_sk_type(sk), (n)) +#define sk_SRP_user_pwd_free(sk) OPENSSL_sk_free(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_delete(sk, i) ((SRP_user_pwd *)OPENSSL_sk_delete(ossl_check_SRP_user_pwd_sk_type(sk), (i))) +#define sk_SRP_user_pwd_delete_ptr(sk, ptr) ((SRP_user_pwd *)OPENSSL_sk_delete_ptr(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr))) +#define sk_SRP_user_pwd_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_pop(sk) ((SRP_user_pwd *)OPENSSL_sk_pop(ossl_check_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_shift(sk) ((SRP_user_pwd *)OPENSSL_sk_shift(ossl_check_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_user_pwd_sk_type(sk),ossl_check_SRP_user_pwd_freefunc_type(freefunc)) +#define sk_SRP_user_pwd_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr), (idx)) +#define sk_SRP_user_pwd_set(sk, idx, ptr) ((SRP_user_pwd *)OPENSSL_sk_set(ossl_check_SRP_user_pwd_sk_type(sk), (idx), ossl_check_SRP_user_pwd_type(ptr))) +#define sk_SRP_user_pwd_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr), pnum) +#define sk_SRP_user_pwd_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_dup(sk) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_dup(ossl_check_const_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_copyfunc_type(copyfunc), ossl_check_SRP_user_pwd_freefunc_type(freefunc))) +#define sk_SRP_user_pwd_set_cmp_func(sk, cmp) ((sk_SRP_user_pwd_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_compfunc_type(cmp))) + + +OSSL_DEPRECATEDIN_3_0 +SRP_user_pwd *SRP_user_pwd_new(void); +OSSL_DEPRECATEDIN_3_0 +void SRP_user_pwd_free(SRP_user_pwd *user_pwd); + +OSSL_DEPRECATEDIN_3_0 +void SRP_user_pwd_set_gN(SRP_user_pwd *user_pwd, const BIGNUM *g, + const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +int SRP_user_pwd_set1_ids(SRP_user_pwd *user_pwd, const char *id, + const char *info); +OSSL_DEPRECATEDIN_3_0 +int SRP_user_pwd_set0_sv(SRP_user_pwd *user_pwd, BIGNUM *s, BIGNUM *v); + +typedef struct SRP_VBASE_st { + STACK_OF(SRP_user_pwd) *users_pwd; + STACK_OF(SRP_gN_cache) *gN_cache; +/* to simulate a user */ + char *seed_key; + const BIGNUM *default_g; + const BIGNUM *default_N; +} SRP_VBASE; + +/* + * Internal structure storing N and g pair + */ +typedef struct SRP_gN_st { + char *id; + const BIGNUM *g; + const BIGNUM *N; +} SRP_gN; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_gN, SRP_gN, SRP_gN) +#define sk_SRP_gN_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_value(sk, idx) ((SRP_gN *)OPENSSL_sk_value(ossl_check_const_SRP_gN_sk_type(sk), (idx))) +#define sk_SRP_gN_new(cmp) ((STACK_OF(SRP_gN) *)OPENSSL_sk_new(ossl_check_SRP_gN_compfunc_type(cmp))) +#define sk_SRP_gN_new_null() ((STACK_OF(SRP_gN) *)OPENSSL_sk_new_null()) +#define sk_SRP_gN_new_reserve(cmp, n) ((STACK_OF(SRP_gN) *)OPENSSL_sk_new_reserve(ossl_check_SRP_gN_compfunc_type(cmp), (n))) +#define sk_SRP_gN_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_gN_sk_type(sk), (n)) +#define sk_SRP_gN_free(sk) OPENSSL_sk_free(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_delete(sk, i) ((SRP_gN *)OPENSSL_sk_delete(ossl_check_SRP_gN_sk_type(sk), (i))) +#define sk_SRP_gN_delete_ptr(sk, ptr) ((SRP_gN *)OPENSSL_sk_delete_ptr(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr))) +#define sk_SRP_gN_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_pop(sk) ((SRP_gN *)OPENSSL_sk_pop(ossl_check_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_shift(sk) ((SRP_gN *)OPENSSL_sk_shift(ossl_check_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_gN_sk_type(sk),ossl_check_SRP_gN_freefunc_type(freefunc)) +#define sk_SRP_gN_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr), (idx)) +#define sk_SRP_gN_set(sk, idx, ptr) ((SRP_gN *)OPENSSL_sk_set(ossl_check_SRP_gN_sk_type(sk), (idx), ossl_check_SRP_gN_type(ptr))) +#define sk_SRP_gN_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr), pnum) +#define sk_SRP_gN_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_dup(sk) ((STACK_OF(SRP_gN) *)OPENSSL_sk_dup(ossl_check_const_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_gN) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_gN_sk_type(sk), ossl_check_SRP_gN_copyfunc_type(copyfunc), ossl_check_SRP_gN_freefunc_type(freefunc))) +#define sk_SRP_gN_set_cmp_func(sk, cmp) ((sk_SRP_gN_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_compfunc_type(cmp))) + + + +OSSL_DEPRECATEDIN_3_0 +SRP_VBASE *SRP_VBASE_new(char *seed_key); +OSSL_DEPRECATEDIN_3_0 +void SRP_VBASE_free(SRP_VBASE *vb); +OSSL_DEPRECATEDIN_3_0 +int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file); + +OSSL_DEPRECATEDIN_3_0 +int SRP_VBASE_add0_user(SRP_VBASE *vb, SRP_user_pwd *user_pwd); + +/* NOTE: unlike in SRP_VBASE_get_by_user, caller owns the returned pointer.*/ +OSSL_DEPRECATEDIN_3_0 +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username); + +OSSL_DEPRECATEDIN_3_0 +char *SRP_create_verifier_ex(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +char *SRP_create_verifier(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g); +OSSL_DEPRECATEDIN_3_0 +int SRP_create_verifier_BN_ex(const char *user, const char *pass, BIGNUM **salt, + BIGNUM **verifier, const BIGNUM *N, + const BIGNUM *g, OSSL_LIB_CTX *libctx, + const char *propq); +OSSL_DEPRECATEDIN_3_0 +int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, + BIGNUM **verifier, const BIGNUM *N, + const BIGNUM *g); + +# define SRP_NO_ERROR 0 +# define SRP_ERR_VBASE_INCOMPLETE_FILE 1 +# define SRP_ERR_VBASE_BN_LIB 2 +# define SRP_ERR_OPEN_FILE 3 +# define SRP_ERR_MEMORY 4 + +# define DB_srptype 0 +# define DB_srpverifier 1 +# define DB_srpsalt 2 +# define DB_srpid 3 +# define DB_srpgN 4 +# define DB_srpinfo 5 +# undef DB_NUMBER +# define DB_NUMBER 6 + +# define DB_SRP_INDEX 'I' +# define DB_SRP_VALID 'V' +# define DB_SRP_REVOKED 'R' +# define DB_SRP_MODIF 'v' + +/* see srp.c */ +OSSL_DEPRECATEDIN_3_0 +char *SRP_check_known_gN_param(const BIGNUM *g, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +SRP_gN *SRP_get_default_gN(const char *id); + +/* server side .... */ +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_server_key(const BIGNUM *A, const BIGNUM *v, const BIGNUM *u, + const BIGNUM *b, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_B_ex(const BIGNUM *b, const BIGNUM *N, const BIGNUM *g, + const BIGNUM *v, OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_B(const BIGNUM *b, const BIGNUM *N, const BIGNUM *g, + const BIGNUM *v); + +OSSL_DEPRECATEDIN_3_0 +int SRP_Verify_A_mod_N(const BIGNUM *A, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_u_ex(const BIGNUM *A, const BIGNUM *B, const BIGNUM *N, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_u(const BIGNUM *A, const BIGNUM *B, const BIGNUM *N); + +/* client side .... */ + +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_x_ex(const BIGNUM *s, const char *user, const char *pass, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_x(const BIGNUM *s, const char *user, const char *pass); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_A(const BIGNUM *a, const BIGNUM *N, const BIGNUM *g); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_client_key_ex(const BIGNUM *N, const BIGNUM *B, const BIGNUM *g, + const BIGNUM *x, const BIGNUM *a, const BIGNUM *u, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_client_key(const BIGNUM *N, const BIGNUM *B, const BIGNUM *g, + const BIGNUM *x, const BIGNUM *a, const BIGNUM *u); +OSSL_DEPRECATEDIN_3_0 +int SRP_Verify_B_mod_N(const BIGNUM *B, const BIGNUM *N); + +# define SRP_MINIMAL_N 1024 + +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ + +/* This method ignores the configured seed and fails for an unknown user. */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +# endif + +# ifdef __cplusplus +} +# endif +# endif + +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h new file mode 100644 index 00000000000..2b43485f725 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ssl.h @@ -0,0 +1,2765 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ssl.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * Copyright 2005 Nokia. All rights reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_SSL_H +# define OPENSSL_SSL_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SSL_H +# endif + +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# include +# include +# endif +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* OpenSSL version number for ASN.1 encoding of the session information */ +/*- + * Version 0 - initial version + * Version 1 - added the optional peer certificate + */ +# define SSL_SESSION_ASN1_VERSION 0x0001 + +# define SSL_MAX_SSL_SESSION_ID_LENGTH 32 +# define SSL_MAX_SID_CTX_LENGTH 32 + +# define SSL_MIN_RSA_MODULUS_LENGTH_IN_BYTES (512/8) +# define SSL_MAX_KEY_ARG_LENGTH 8 +/* SSL_MAX_MASTER_KEY_LENGTH is defined in prov_ssl.h */ + +/* The maximum number of encrypt/decrypt pipelines we can support */ +# define SSL_MAX_PIPELINES 32 + +/* text strings for the ciphers */ + +/* These are used to specify which ciphers to use and not to use */ + +# define SSL_TXT_LOW "LOW" +# define SSL_TXT_MEDIUM "MEDIUM" +# define SSL_TXT_HIGH "HIGH" +# define SSL_TXT_FIPS "FIPS" + +# define SSL_TXT_aNULL "aNULL" +# define SSL_TXT_eNULL "eNULL" +# define SSL_TXT_NULL "NULL" + +# define SSL_TXT_kRSA "kRSA" +# define SSL_TXT_kDHr "kDHr"/* this cipher class has been removed */ +# define SSL_TXT_kDHd "kDHd"/* this cipher class has been removed */ +# define SSL_TXT_kDH "kDH"/* this cipher class has been removed */ +# define SSL_TXT_kEDH "kEDH"/* alias for kDHE */ +# define SSL_TXT_kDHE "kDHE" +# define SSL_TXT_kECDHr "kECDHr"/* this cipher class has been removed */ +# define SSL_TXT_kECDHe "kECDHe"/* this cipher class has been removed */ +# define SSL_TXT_kECDH "kECDH"/* this cipher class has been removed */ +# define SSL_TXT_kEECDH "kEECDH"/* alias for kECDHE */ +# define SSL_TXT_kECDHE "kECDHE" +# define SSL_TXT_kPSK "kPSK" +# define SSL_TXT_kRSAPSK "kRSAPSK" +# define SSL_TXT_kECDHEPSK "kECDHEPSK" +# define SSL_TXT_kDHEPSK "kDHEPSK" +# define SSL_TXT_kGOST "kGOST" +# define SSL_TXT_kGOST18 "kGOST18" +# define SSL_TXT_kSRP "kSRP" + +# define SSL_TXT_aRSA "aRSA" +# define SSL_TXT_aDSS "aDSS" +# define SSL_TXT_aDH "aDH"/* this cipher class has been removed */ +# define SSL_TXT_aECDH "aECDH"/* this cipher class has been removed */ +# define SSL_TXT_aECDSA "aECDSA" +# define SSL_TXT_aPSK "aPSK" +# define SSL_TXT_aGOST94 "aGOST94" +# define SSL_TXT_aGOST01 "aGOST01" +# define SSL_TXT_aGOST12 "aGOST12" +# define SSL_TXT_aGOST "aGOST" +# define SSL_TXT_aSRP "aSRP" + +# define SSL_TXT_DSS "DSS" +# define SSL_TXT_DH "DH" +# define SSL_TXT_DHE "DHE"/* same as "kDHE:-ADH" */ +# define SSL_TXT_EDH "EDH"/* alias for DHE */ +# define SSL_TXT_ADH "ADH" +# define SSL_TXT_RSA "RSA" +# define SSL_TXT_ECDH "ECDH" +# define SSL_TXT_EECDH "EECDH"/* alias for ECDHE" */ +# define SSL_TXT_ECDHE "ECDHE"/* same as "kECDHE:-AECDH" */ +# define SSL_TXT_AECDH "AECDH" +# define SSL_TXT_ECDSA "ECDSA" +# define SSL_TXT_PSK "PSK" +# define SSL_TXT_SRP "SRP" + +# define SSL_TXT_DES "DES" +# define SSL_TXT_3DES "3DES" +# define SSL_TXT_RC4 "RC4" +# define SSL_TXT_RC2 "RC2" +# define SSL_TXT_IDEA "IDEA" +# define SSL_TXT_SEED "SEED" +# define SSL_TXT_AES128 "AES128" +# define SSL_TXT_AES256 "AES256" +# define SSL_TXT_AES "AES" +# define SSL_TXT_AES_GCM "AESGCM" +# define SSL_TXT_AES_CCM "AESCCM" +# define SSL_TXT_AES_CCM_8 "AESCCM8" +# define SSL_TXT_CAMELLIA128 "CAMELLIA128" +# define SSL_TXT_CAMELLIA256 "CAMELLIA256" +# define SSL_TXT_CAMELLIA "CAMELLIA" +# define SSL_TXT_CHACHA20 "CHACHA20" +# define SSL_TXT_GOST "GOST89" +# define SSL_TXT_ARIA "ARIA" +# define SSL_TXT_ARIA_GCM "ARIAGCM" +# define SSL_TXT_ARIA128 "ARIA128" +# define SSL_TXT_ARIA256 "ARIA256" +# define SSL_TXT_GOST2012_GOST8912_GOST8912 "GOST2012-GOST8912-GOST8912" +# define SSL_TXT_CBC "CBC" + +# define SSL_TXT_MD5 "MD5" +# define SSL_TXT_SHA1 "SHA1" +# define SSL_TXT_SHA "SHA"/* same as "SHA1" */ +# define SSL_TXT_GOST94 "GOST94" +# define SSL_TXT_GOST89MAC "GOST89MAC" +# define SSL_TXT_GOST12 "GOST12" +# define SSL_TXT_GOST89MAC12 "GOST89MAC12" +# define SSL_TXT_SHA256 "SHA256" +# define SSL_TXT_SHA384 "SHA384" + +# define SSL_TXT_SSLV3 "SSLv3" +# define SSL_TXT_TLSV1 "TLSv1" +# define SSL_TXT_TLSV1_1 "TLSv1.1" +# define SSL_TXT_TLSV1_2 "TLSv1.2" + +# define SSL_TXT_ALL "ALL" + +/*- + * COMPLEMENTOF* definitions. These identifiers are used to (de-select) + * ciphers normally not being used. + * Example: "RC4" will activate all ciphers using RC4 including ciphers + * without authentication, which would normally disabled by DEFAULT (due + * the "!ADH" being part of default). Therefore "RC4:!COMPLEMENTOFDEFAULT" + * will make sure that it is also disabled in the specific selection. + * COMPLEMENTOF* identifiers are portable between version, as adjustments + * to the default cipher setup will also be included here. + * + * COMPLEMENTOFDEFAULT does not experience the same special treatment that + * DEFAULT gets, as only selection is being done and no sorting as needed + * for DEFAULT. + */ +# define SSL_TXT_CMPALL "COMPLEMENTOFALL" +# define SSL_TXT_CMPDEF "COMPLEMENTOFDEFAULT" + +/* + * The following cipher list is used by default. It also is substituted when + * an application-defined cipher list string starts with 'DEFAULT'. + * This applies to ciphersuites for TLSv1.2 and below. + * DEPRECATED IN 3.0.0, in favor of OSSL_default_cipher_list() + * Update both macro and function simultaneously + */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_DEFAULT_CIPHER_LIST "ALL:!COMPLEMENTOFDEFAULT:!eNULL" +/* + * This is the default set of TLSv1.3 ciphersuites + * DEPRECATED IN 3.0.0, in favor of OSSL_default_ciphersuites() + * Update both macro and function simultaneously + */ +# define TLS_DEFAULT_CIPHERSUITES "TLS_AES_256_GCM_SHA384:" \ + "TLS_CHACHA20_POLY1305_SHA256:" \ + "TLS_AES_128_GCM_SHA256" +# endif +/* + * As of OpenSSL 1.0.0, ssl_create_cipher_list() in ssl/ssl_ciph.c always + * starts with a reasonable order, and all we have to do for DEFAULT is + * throwing out anonymous and unencrypted ciphersuites! (The latter are not + * actually enabled by ALL, but "ALL:RSA" would enable some of them.) + */ + +/* Used in SSL_set_shutdown()/SSL_get_shutdown(); */ +# define SSL_SENT_SHUTDOWN 1 +# define SSL_RECEIVED_SHUTDOWN 2 + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define SSL_FILETYPE_ASN1 X509_FILETYPE_ASN1 +# define SSL_FILETYPE_PEM X509_FILETYPE_PEM + +/* + * This is needed to stop compilers complaining about the 'struct ssl_st *' + * function parameters used to prototype callbacks in SSL_CTX. + */ +typedef struct ssl_st *ssl_crock_st; +typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT; +typedef struct ssl_method_st SSL_METHOD; +typedef struct ssl_cipher_st SSL_CIPHER; +typedef struct ssl_session_st SSL_SESSION; +typedef struct tls_sigalgs_st TLS_SIGALGS; +typedef struct ssl_conf_ctx_st SSL_CONF_CTX; +typedef struct ssl_comp_st SSL_COMP; + +STACK_OF(SSL_CIPHER); +STACK_OF(SSL_COMP); + +/* SRTP protection profiles for use with the use_srtp extension (RFC 5764)*/ +typedef struct srtp_protection_profile_st { + const char *name; + unsigned long id; +} SRTP_PROTECTION_PROFILE; +SKM_DEFINE_STACK_OF_INTERNAL(SRTP_PROTECTION_PROFILE, SRTP_PROTECTION_PROFILE, SRTP_PROTECTION_PROFILE) +#define sk_SRTP_PROTECTION_PROFILE_num(sk) OPENSSL_sk_num(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_value(sk, idx) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_value(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk), (idx))) +#define sk_SRTP_PROTECTION_PROFILE_new(cmp) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new(ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp))) +#define sk_SRTP_PROTECTION_PROFILE_new_null() ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new_null()) +#define sk_SRTP_PROTECTION_PROFILE_new_reserve(cmp, n) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new_reserve(ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp), (n))) +#define sk_SRTP_PROTECTION_PROFILE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (n)) +#define sk_SRTP_PROTECTION_PROFILE_free(sk) OPENSSL_sk_free(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_zero(sk) OPENSSL_sk_zero(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_delete(sk, i) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_delete(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (i))) +#define sk_SRTP_PROTECTION_PROFILE_delete_ptr(sk, ptr) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_delete_ptr(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr))) +#define sk_SRTP_PROTECTION_PROFILE_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_pop(sk) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_pop(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_shift(sk) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_shift(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk),ossl_check_SRTP_PROTECTION_PROFILE_freefunc_type(freefunc)) +#define sk_SRTP_PROTECTION_PROFILE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr), (idx)) +#define sk_SRTP_PROTECTION_PROFILE_set(sk, idx, ptr) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_set(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (idx), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr))) +#define sk_SRTP_PROTECTION_PROFILE_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr), pnum) +#define sk_SRTP_PROTECTION_PROFILE_sort(sk) OPENSSL_sk_sort(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_dup(sk) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_dup(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_deep_copy(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_copyfunc_type(copyfunc), ossl_check_SRTP_PROTECTION_PROFILE_freefunc_type(freefunc))) +#define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(sk, cmp) ((sk_SRTP_PROTECTION_PROFILE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp))) + + + +typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, + int len, void *arg); +typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, + STACK_OF(SSL_CIPHER) *peer_ciphers, + const SSL_CIPHER **cipher, void *arg); + +/* Extension context codes */ +/* This extension is only allowed in TLS */ +#define SSL_EXT_TLS_ONLY 0x00001 +/* This extension is only allowed in DTLS */ +#define SSL_EXT_DTLS_ONLY 0x00002 +/* Some extensions may be allowed in DTLS but we don't implement them for it */ +#define SSL_EXT_TLS_IMPLEMENTATION_ONLY 0x00004 +/* Most extensions are not defined for SSLv3 but EXT_TYPE_renegotiate is */ +#define SSL_EXT_SSL3_ALLOWED 0x00008 +/* Extension is only defined for TLS1.2 and below */ +#define SSL_EXT_TLS1_2_AND_BELOW_ONLY 0x00010 +/* Extension is only defined for TLS1.3 and above */ +#define SSL_EXT_TLS1_3_ONLY 0x00020 +/* Ignore this extension during parsing if we are resuming */ +#define SSL_EXT_IGNORE_ON_RESUMPTION 0x00040 +#define SSL_EXT_CLIENT_HELLO 0x00080 +/* Really means TLS1.2 or below */ +#define SSL_EXT_TLS1_2_SERVER_HELLO 0x00100 +#define SSL_EXT_TLS1_3_SERVER_HELLO 0x00200 +#define SSL_EXT_TLS1_3_ENCRYPTED_EXTENSIONS 0x00400 +#define SSL_EXT_TLS1_3_HELLO_RETRY_REQUEST 0x00800 +#define SSL_EXT_TLS1_3_CERTIFICATE 0x01000 +#define SSL_EXT_TLS1_3_NEW_SESSION_TICKET 0x02000 +#define SSL_EXT_TLS1_3_CERTIFICATE_REQUEST 0x04000 +#define SSL_EXT_TLS1_3_CERTIFICATE_COMPRESSION 0x08000 +/* When sending a raw public key in a certificate message */ +#define SSL_EXT_TLS1_3_RAW_PUBLIC_KEY 0x10000 + +/* Typedefs for handling custom extensions */ + +typedef int (*custom_ext_add_cb)(SSL *s, unsigned int ext_type, + const unsigned char **out, size_t *outlen, + int *al, void *add_arg); + +typedef void (*custom_ext_free_cb)(SSL *s, unsigned int ext_type, + const unsigned char *out, void *add_arg); + +typedef int (*custom_ext_parse_cb)(SSL *s, unsigned int ext_type, + const unsigned char *in, size_t inlen, + int *al, void *parse_arg); + + +typedef int (*SSL_custom_ext_add_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char **out, + size_t *outlen, X509 *x, + size_t chainidx, + int *al, void *add_arg); + +typedef void (*SSL_custom_ext_free_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char *out, + void *add_arg); + +typedef int (*SSL_custom_ext_parse_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char *in, + size_t inlen, X509 *x, + size_t chainidx, + int *al, void *parse_arg); + +/* Typedef for verification callback */ +typedef int (*SSL_verify_cb)(int preverify_ok, X509_STORE_CTX *x509_ctx); + +/* Typedef for SSL async callback */ +typedef int (*SSL_async_callback_fn)(SSL *s, void *arg); + +#define SSL_OP_BIT(n) ((uint64_t)1 << (uint64_t)n) + +/* + * SSL/TLS connection options. + */ + /* Disable Extended master secret */ +# define SSL_OP_NO_EXTENDED_MASTER_SECRET SSL_OP_BIT(0) + /* Cleanse plaintext copies of data delivered to the application */ +# define SSL_OP_CLEANSE_PLAINTEXT SSL_OP_BIT(1) + /* Allow initial connection to servers that don't support RI */ +# define SSL_OP_LEGACY_SERVER_CONNECT SSL_OP_BIT(2) + /* Enable support for Kernel TLS */ +# define SSL_OP_ENABLE_KTLS SSL_OP_BIT(3) +# define SSL_OP_TLSEXT_PADDING SSL_OP_BIT(4) +# define SSL_OP_SAFARI_ECDHE_ECDSA_BUG SSL_OP_BIT(6) +# define SSL_OP_IGNORE_UNEXPECTED_EOF SSL_OP_BIT(7) +# define SSL_OP_ALLOW_CLIENT_RENEGOTIATION SSL_OP_BIT(8) +# define SSL_OP_DISABLE_TLSEXT_CA_NAMES SSL_OP_BIT(9) + /* In TLSv1.3 allow a non-(ec)dhe based kex_mode */ +# define SSL_OP_ALLOW_NO_DHE_KEX SSL_OP_BIT(10) + /* + * Disable SSL 3.0/TLS 1.0 CBC vulnerability workaround that was added + * in OpenSSL 0.9.6d. Usually (depending on the application protocol) + * the workaround is not needed. Unfortunately some broken SSL/TLS + * implementations cannot handle it at all, which is why we include it + * in SSL_OP_ALL. Added in 0.9.6e + */ +# define SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS SSL_OP_BIT(11) + /* DTLS options */ +# define SSL_OP_NO_QUERY_MTU SSL_OP_BIT(12) + /* Turn on Cookie Exchange (on relevant for servers) */ +# define SSL_OP_COOKIE_EXCHANGE SSL_OP_BIT(13) + /* Don't use RFC4507 ticket extension */ +# define SSL_OP_NO_TICKET SSL_OP_BIT(14) +# ifndef OPENSSL_NO_DTLS1_METHOD + /* + * Use Cisco's version identifier of DTLS_BAD_VER + * (only with deprecated DTLSv1_client_method()) + */ +# define SSL_OP_CISCO_ANYCONNECT SSL_OP_BIT(15) +# endif + /* As server, disallow session resumption on renegotiation */ +# define SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION SSL_OP_BIT(16) + /* Don't use compression even if supported */ +# define SSL_OP_NO_COMPRESSION SSL_OP_BIT(17) + /* Permit unsafe legacy renegotiation */ +# define SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION SSL_OP_BIT(18) + /* Disable encrypt-then-mac */ +# define SSL_OP_NO_ENCRYPT_THEN_MAC SSL_OP_BIT(19) + /* + * Enable TLSv1.3 Compatibility mode. This is on by default. A future + * version of OpenSSL may have this disabled by default. + */ +# define SSL_OP_ENABLE_MIDDLEBOX_COMPAT SSL_OP_BIT(20) + /* + * Prioritize Chacha20Poly1305 when client does. + * Modifies SSL_OP_CIPHER_SERVER_PREFERENCE + */ +# define SSL_OP_PRIORITIZE_CHACHA SSL_OP_BIT(21) + /* + * Set on servers to choose the cipher according to server's preferences. + */ +# define SSL_OP_CIPHER_SERVER_PREFERENCE SSL_OP_BIT(22) + /* + * If set, a server will allow a client to issue a SSLv3.0 version + * number as latest version supported in the premaster secret, even when + * TLSv1.0 (version 3.1) was announced in the client hello. Normally + * this is forbidden to prevent version rollback attacks. + */ +# define SSL_OP_TLS_ROLLBACK_BUG SSL_OP_BIT(23) + /* + * Switches off automatic TLSv1.3 anti-replay protection for early data. + * This is a server-side option only (no effect on the client). + */ +# define SSL_OP_NO_ANTI_REPLAY SSL_OP_BIT(24) +# define SSL_OP_NO_SSLv3 SSL_OP_BIT(25) +# define SSL_OP_NO_TLSv1 SSL_OP_BIT(26) +# define SSL_OP_NO_TLSv1_2 SSL_OP_BIT(27) +# define SSL_OP_NO_TLSv1_1 SSL_OP_BIT(28) +# define SSL_OP_NO_TLSv1_3 SSL_OP_BIT(29) +# define SSL_OP_NO_DTLSv1 SSL_OP_BIT(26) +# define SSL_OP_NO_DTLSv1_2 SSL_OP_BIT(27) + /* Disallow all renegotiation */ +# define SSL_OP_NO_RENEGOTIATION SSL_OP_BIT(30) + /* + * Make server add server-hello extension from early version of + * cryptopro draft, when GOST ciphersuite is negotiated. Required for + * interoperability with CryptoPro CSP 3.x + */ +# define SSL_OP_CRYPTOPRO_TLSEXT_BUG SSL_OP_BIT(31) +/* + * Disable RFC8879 certificate compression + * SSL_OP_NO_TX_CERTIFICATE_COMPRESSION: don't send compressed certificates, + * and ignore the extension when received. + * SSL_OP_NO_RX_CERTIFICATE_COMPRESSION: don't send the extension, and + * subsequently indicating that receiving is not supported + */ +# define SSL_OP_NO_TX_CERTIFICATE_COMPRESSION SSL_OP_BIT(32) +# define SSL_OP_NO_RX_CERTIFICATE_COMPRESSION SSL_OP_BIT(33) + /* Enable KTLS TX zerocopy on Linux */ +# define SSL_OP_ENABLE_KTLS_TX_ZEROCOPY_SENDFILE SSL_OP_BIT(34) + +/* + * Option "collections." + */ +# define SSL_OP_NO_SSL_MASK \ + ( SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_TLSv1_1 \ + | SSL_OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3 ) +# define SSL_OP_NO_DTLS_MASK \ + ( SSL_OP_NO_DTLSv1 | SSL_OP_NO_DTLSv1_2 ) + +/* Various bug workarounds that should be rather harmless. */ +# define SSL_OP_ALL \ + ( SSL_OP_CRYPTOPRO_TLSEXT_BUG | SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS \ + | SSL_OP_TLSEXT_PADDING | SSL_OP_SAFARI_ECDHE_ECDSA_BUG ) + +/* + * OBSOLETE OPTIONS retained for compatibility + */ + +# define SSL_OP_MICROSOFT_SESS_ID_BUG 0x0 +# define SSL_OP_NETSCAPE_CHALLENGE_BUG 0x0 +# define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x0 +# define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 +# define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x0 +# define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 +# define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x0 +# define SSL_OP_TLS_D5_BUG 0x0 +# define SSL_OP_TLS_BLOCK_PADDING_BUG 0x0 +# define SSL_OP_SINGLE_ECDH_USE 0x0 +# define SSL_OP_SINGLE_DH_USE 0x0 +# define SSL_OP_EPHEMERAL_RSA 0x0 +# define SSL_OP_NO_SSLv2 0x0 +# define SSL_OP_PKCS1_CHECK_1 0x0 +# define SSL_OP_PKCS1_CHECK_2 0x0 +# define SSL_OP_NETSCAPE_CA_DN_BUG 0x0 +# define SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG 0x0 + +/* + * Allow SSL_write(..., n) to return r with 0 < r < n (i.e. report success + * when just a single record has been written): + */ +# define SSL_MODE_ENABLE_PARTIAL_WRITE 0x00000001U +/* + * Make it possible to retry SSL_write() with changed buffer location (buffer + * contents must stay the same!); this is not the default to avoid the + * misconception that non-blocking SSL_write() behaves like non-blocking + * write(): + */ +# define SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER 0x00000002U +/* + * Never bother the application with retries if the transport is blocking: + */ +# define SSL_MODE_AUTO_RETRY 0x00000004U +/* Don't attempt to automatically build certificate chain */ +# define SSL_MODE_NO_AUTO_CHAIN 0x00000008U +/* + * Save RAM by releasing read and write buffers when they're empty. (SSL3 and + * TLS only.) Released buffers are freed. + */ +# define SSL_MODE_RELEASE_BUFFERS 0x00000010U +/* + * Send the current time in the Random fields of the ClientHello and + * ServerHello records for compatibility with hypothetical implementations + * that require it. + */ +# define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020U +# define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040U +/* + * Send TLS_FALLBACK_SCSV in the ClientHello. To be set only by applications + * that reconnect with a downgraded protocol version; see + * draft-ietf-tls-downgrade-scsv-00 for details. DO NOT ENABLE THIS if your + * application attempts a normal handshake. Only use this in explicit + * fallback retries, following the guidance in + * draft-ietf-tls-downgrade-scsv-00. + */ +# define SSL_MODE_SEND_FALLBACK_SCSV 0x00000080U +/* + * Support Asynchronous operation + */ +# define SSL_MODE_ASYNC 0x00000100U + +/* + * When using DTLS/SCTP, include the terminating zero in the label + * used for computing the endpoint-pair shared secret. Required for + * interoperability with implementations having this bug like these + * older version of OpenSSL: + * - OpenSSL 1.0.0 series + * - OpenSSL 1.0.1 series + * - OpenSSL 1.0.2 series + * - OpenSSL 1.1.0 series + * - OpenSSL 1.1.1 and 1.1.1a + */ +# define SSL_MODE_DTLS_SCTP_LABEL_LENGTH_BUG 0x00000400U + +/* Cert related flags */ +/* + * Many implementations ignore some aspects of the TLS standards such as + * enforcing certificate chain algorithms. When this is set we enforce them. + */ +# define SSL_CERT_FLAG_TLS_STRICT 0x00000001U + +/* Suite B modes, takes same values as certificate verify flags */ +# define SSL_CERT_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define SSL_CERT_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define SSL_CERT_FLAG_SUITEB_128_LOS 0x30000 + +/* Perform all sorts of protocol violations for testing purposes */ +# define SSL_CERT_FLAG_BROKEN_PROTOCOL 0x10000000 + +/* Flags for building certificate chains */ +/* Treat any existing certificates as untrusted CAs */ +# define SSL_BUILD_CHAIN_FLAG_UNTRUSTED 0x1 +/* Don't include root CA in chain */ +# define SSL_BUILD_CHAIN_FLAG_NO_ROOT 0x2 +/* Just check certificates already there */ +# define SSL_BUILD_CHAIN_FLAG_CHECK 0x4 +/* Ignore verification errors */ +# define SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR 0x8 +/* Clear verification errors from queue */ +# define SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR 0x10 + +/* Flags returned by SSL_check_chain */ +/* Certificate can be used with this session */ +# define CERT_PKEY_VALID 0x1 +/* Certificate can also be used for signing */ +# define CERT_PKEY_SIGN 0x2 +/* EE certificate signing algorithm OK */ +# define CERT_PKEY_EE_SIGNATURE 0x10 +/* CA signature algorithms OK */ +# define CERT_PKEY_CA_SIGNATURE 0x20 +/* EE certificate parameters OK */ +# define CERT_PKEY_EE_PARAM 0x40 +/* CA certificate parameters OK */ +# define CERT_PKEY_CA_PARAM 0x80 +/* Signing explicitly allowed as opposed to SHA1 fallback */ +# define CERT_PKEY_EXPLICIT_SIGN 0x100 +/* Client CA issuer names match (always set for server cert) */ +# define CERT_PKEY_ISSUER_NAME 0x200 +/* Cert type matches client types (always set for server cert) */ +# define CERT_PKEY_CERT_TYPE 0x400 +/* Cert chain suitable to Suite B */ +# define CERT_PKEY_SUITEB 0x800 +/* Cert pkey valid for raw public key use */ +# define CERT_PKEY_RPK 0x1000 + +# define SSL_CONF_FLAG_CMDLINE 0x1 +# define SSL_CONF_FLAG_FILE 0x2 +# define SSL_CONF_FLAG_CLIENT 0x4 +# define SSL_CONF_FLAG_SERVER 0x8 +# define SSL_CONF_FLAG_SHOW_ERRORS 0x10 +# define SSL_CONF_FLAG_CERTIFICATE 0x20 +# define SSL_CONF_FLAG_REQUIRE_PRIVATE 0x40 +/* Configuration value types */ +# define SSL_CONF_TYPE_UNKNOWN 0x0 +# define SSL_CONF_TYPE_STRING 0x1 +# define SSL_CONF_TYPE_FILE 0x2 +# define SSL_CONF_TYPE_DIR 0x3 +# define SSL_CONF_TYPE_NONE 0x4 +# define SSL_CONF_TYPE_STORE 0x5 + +/* Maximum length of the application-controlled segment of a a TLSv1.3 cookie */ +# define SSL_COOKIE_LENGTH 4096 + +/* + * Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, they + * cannot be used to clear bits. + */ + +uint64_t SSL_CTX_get_options(const SSL_CTX *ctx); +uint64_t SSL_get_options(const SSL *s); +uint64_t SSL_CTX_clear_options(SSL_CTX *ctx, uint64_t op); +uint64_t SSL_clear_options(SSL *s, uint64_t op); +uint64_t SSL_CTX_set_options(SSL_CTX *ctx, uint64_t op); +uint64_t SSL_set_options(SSL *s, uint64_t op); + +# define SSL_CTX_set_mode(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_MODE,(op),NULL) +# define SSL_CTX_clear_mode(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_MODE,(op),NULL) +# define SSL_CTX_get_mode(ctx) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_MODE,0,NULL) +# define SSL_clear_mode(ssl,op) \ + SSL_ctrl((ssl),SSL_CTRL_CLEAR_MODE,(op),NULL) +# define SSL_set_mode(ssl,op) \ + SSL_ctrl((ssl),SSL_CTRL_MODE,(op),NULL) +# define SSL_get_mode(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_MODE,0,NULL) +# define SSL_set_mtu(ssl, mtu) \ + SSL_ctrl((ssl),SSL_CTRL_SET_MTU,(mtu),NULL) +# define DTLS_set_link_mtu(ssl, mtu) \ + SSL_ctrl((ssl),DTLS_CTRL_SET_LINK_MTU,(mtu),NULL) +# define DTLS_get_link_min_mtu(ssl) \ + SSL_ctrl((ssl),DTLS_CTRL_GET_LINK_MIN_MTU,0,NULL) + +# define SSL_get_secure_renegotiation_support(ssl) \ + SSL_ctrl((ssl), SSL_CTRL_GET_RI_SUPPORT, 0, NULL) + +# define SSL_CTX_set_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_set_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_CTX_clear_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) +# define SSL_clear_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) + +void SSL_CTX_set_msg_callback(SSL_CTX *ctx, + void (*cb) (int write_p, int version, + int content_type, const void *buf, + size_t len, SSL *ssl, void *arg)); +void SSL_set_msg_callback(SSL *ssl, + void (*cb) (int write_p, int version, + int content_type, const void *buf, + size_t len, SSL *ssl, void *arg)); +# define SSL_CTX_set_msg_callback_arg(ctx, arg) SSL_CTX_ctrl((ctx), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) +# define SSL_set_msg_callback_arg(ssl, arg) SSL_ctrl((ssl), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) + +# define SSL_get_extms_support(s) \ + SSL_ctrl((s),SSL_CTRL_GET_EXTMS_SUPPORT,0,NULL) + +# ifndef OPENSSL_NO_SRP +/* see tls_srp.c */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 __owur int SSL_SRP_CTX_init(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur int SSL_CTX_SRP_CTX_init(SSL_CTX *ctx); +OSSL_DEPRECATEDIN_3_0 int SSL_SRP_CTX_free(SSL *ctx); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_SRP_CTX_free(SSL_CTX *ctx); +OSSL_DEPRECATEDIN_3_0 __owur int SSL_srp_server_param_with_username(SSL *s, + int *ad); +OSSL_DEPRECATEDIN_3_0 __owur int SRP_Calc_A_param(SSL *s); +# endif +# endif + +/* 100k max cert list */ +# define SSL_MAX_CERT_LIST_DEFAULT (1024*100) + +# define SSL_SESSION_CACHE_MAX_SIZE_DEFAULT (1024*20) + +/* + * This callback type is used inside SSL_CTX, SSL, and in the functions that + * set them. It is used to override the generation of SSL/TLS session IDs in + * a server. Return value should be zero on an error, non-zero to proceed. + * Also, callbacks should themselves check if the id they generate is unique + * otherwise the SSL handshake will fail with an error - callbacks can do + * this using the 'ssl' value they're passed by; + * SSL_has_matching_session_id(ssl, id, *id_len) The length value passed in + * is set at the maximum size the session ID can be. In SSLv3/TLSv1 it is 32 + * bytes. The callback can alter this length to be less if desired. It is + * also an error for the callback to set the size to zero. + */ +typedef int (*GEN_SESSION_CB) (SSL *ssl, unsigned char *id, + unsigned int *id_len); + +# define SSL_SESS_CACHE_OFF 0x0000 +# define SSL_SESS_CACHE_CLIENT 0x0001 +# define SSL_SESS_CACHE_SERVER 0x0002 +# define SSL_SESS_CACHE_BOTH (SSL_SESS_CACHE_CLIENT|SSL_SESS_CACHE_SERVER) +# define SSL_SESS_CACHE_NO_AUTO_CLEAR 0x0080 +/* enough comments already ... see SSL_CTX_set_session_cache_mode(3) */ +# define SSL_SESS_CACHE_NO_INTERNAL_LOOKUP 0x0100 +# define SSL_SESS_CACHE_NO_INTERNAL_STORE 0x0200 +# define SSL_SESS_CACHE_NO_INTERNAL \ + (SSL_SESS_CACHE_NO_INTERNAL_LOOKUP|SSL_SESS_CACHE_NO_INTERNAL_STORE) +# define SSL_SESS_CACHE_UPDATE_TIME 0x0400 + +LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx); +# define SSL_CTX_sess_number(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_NUMBER,0,NULL) +# define SSL_CTX_sess_connect(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT,0,NULL) +# define SSL_CTX_sess_connect_good(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT_GOOD,0,NULL) +# define SSL_CTX_sess_connect_renegotiate(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT_RENEGOTIATE,0,NULL) +# define SSL_CTX_sess_accept(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT,0,NULL) +# define SSL_CTX_sess_accept_renegotiate(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT_RENEGOTIATE,0,NULL) +# define SSL_CTX_sess_accept_good(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT_GOOD,0,NULL) +# define SSL_CTX_sess_hits(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_HIT,0,NULL) +# define SSL_CTX_sess_cb_hits(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CB_HIT,0,NULL) +# define SSL_CTX_sess_misses(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_MISSES,0,NULL) +# define SSL_CTX_sess_timeouts(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL) +# define SSL_CTX_sess_cache_full(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL) + +void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, + int (*new_session_cb) (struct ssl_st *ssl, + SSL_SESSION *sess)); +int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx)) (struct ssl_st *ssl, + SSL_SESSION *sess); +void SSL_CTX_sess_set_remove_cb(SSL_CTX *ctx, + void (*remove_session_cb) (struct ssl_ctx_st + *ctx, + SSL_SESSION *sess)); +void (*SSL_CTX_sess_get_remove_cb(SSL_CTX *ctx)) (struct ssl_ctx_st *ctx, + SSL_SESSION *sess); +void SSL_CTX_sess_set_get_cb(SSL_CTX *ctx, + SSL_SESSION *(*get_session_cb) (struct ssl_st + *ssl, + const unsigned char + *data, int len, + int *copy)); +SSL_SESSION *(*SSL_CTX_sess_get_get_cb(SSL_CTX *ctx)) (struct ssl_st *ssl, + const unsigned char *data, + int len, int *copy); +void SSL_CTX_set_info_callback(SSL_CTX *ctx, + void (*cb) (const SSL *ssl, int type, int val)); +void (*SSL_CTX_get_info_callback(SSL_CTX *ctx)) (const SSL *ssl, int type, + int val); +void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, + int (*client_cert_cb) (SSL *ssl, X509 **x509, + EVP_PKEY **pkey)); +int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx)) (SSL *ssl, X509 **x509, + EVP_PKEY **pkey); +# ifndef OPENSSL_NO_ENGINE +__owur int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); +# endif +void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, + int (*app_gen_cookie_cb) (SSL *ssl, + unsigned char + *cookie, + unsigned int + *cookie_len)); +void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, + int (*app_verify_cookie_cb) (SSL *ssl, + const unsigned + char *cookie, + unsigned int + cookie_len)); + +void SSL_CTX_set_stateless_cookie_generate_cb( + SSL_CTX *ctx, + int (*gen_stateless_cookie_cb) (SSL *ssl, + unsigned char *cookie, + size_t *cookie_len)); +void SSL_CTX_set_stateless_cookie_verify_cb( + SSL_CTX *ctx, + int (*verify_stateless_cookie_cb) (SSL *ssl, + const unsigned char *cookie, + size_t cookie_len)); +# ifndef OPENSSL_NO_NEXTPROTONEG + +typedef int (*SSL_CTX_npn_advertised_cb_func)(SSL *ssl, + const unsigned char **out, + unsigned int *outlen, + void *arg); +void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s, + SSL_CTX_npn_advertised_cb_func cb, + void *arg); +# define SSL_CTX_set_npn_advertised_cb SSL_CTX_set_next_protos_advertised_cb + +typedef int (*SSL_CTX_npn_select_cb_func)(SSL *s, + unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg); +void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s, + SSL_CTX_npn_select_cb_func cb, + void *arg); +# define SSL_CTX_set_npn_select_cb SSL_CTX_set_next_proto_select_cb + +void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, + unsigned *len); +# define SSL_get0_npn_negotiated SSL_get0_next_proto_negotiated +# endif + +__owur int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, + const unsigned char *in, unsigned int inlen, + const unsigned char *client, + unsigned int client_len); + +# define OPENSSL_NPN_UNSUPPORTED 0 +# define OPENSSL_NPN_NEGOTIATED 1 +# define OPENSSL_NPN_NO_OVERLAP 2 + +__owur int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos, + unsigned int protos_len); +__owur int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos, + unsigned int protos_len); +typedef int (*SSL_CTX_alpn_select_cb_func)(SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg); +void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx, + SSL_CTX_alpn_select_cb_func cb, + void *arg); +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned int *len); + +# ifndef OPENSSL_NO_PSK +/* + * the maximum length of the buffer given to callbacks containing the + * resulting identity/psk + */ +# define PSK_MAX_IDENTITY_LEN 256 +# define PSK_MAX_PSK_LEN 512 +typedef unsigned int (*SSL_psk_client_cb_func)(SSL *ssl, + const char *hint, + char *identity, + unsigned int max_identity_len, + unsigned char *psk, + unsigned int max_psk_len); +void SSL_CTX_set_psk_client_callback(SSL_CTX *ctx, SSL_psk_client_cb_func cb); +void SSL_set_psk_client_callback(SSL *ssl, SSL_psk_client_cb_func cb); + +typedef unsigned int (*SSL_psk_server_cb_func)(SSL *ssl, + const char *identity, + unsigned char *psk, + unsigned int max_psk_len); +void SSL_CTX_set_psk_server_callback(SSL_CTX *ctx, SSL_psk_server_cb_func cb); +void SSL_set_psk_server_callback(SSL *ssl, SSL_psk_server_cb_func cb); + +__owur int SSL_CTX_use_psk_identity_hint(SSL_CTX *ctx, const char *identity_hint); +__owur int SSL_use_psk_identity_hint(SSL *s, const char *identity_hint); +const char *SSL_get_psk_identity_hint(const SSL *s); +const char *SSL_get_psk_identity(const SSL *s); +# endif + +typedef int (*SSL_psk_find_session_cb_func)(SSL *ssl, + const unsigned char *identity, + size_t identity_len, + SSL_SESSION **sess); +typedef int (*SSL_psk_use_session_cb_func)(SSL *ssl, const EVP_MD *md, + const unsigned char **id, + size_t *idlen, + SSL_SESSION **sess); + +void SSL_set_psk_find_session_callback(SSL *s, SSL_psk_find_session_cb_func cb); +void SSL_CTX_set_psk_find_session_callback(SSL_CTX *ctx, + SSL_psk_find_session_cb_func cb); +void SSL_set_psk_use_session_callback(SSL *s, SSL_psk_use_session_cb_func cb); +void SSL_CTX_set_psk_use_session_callback(SSL_CTX *ctx, + SSL_psk_use_session_cb_func cb); + +/* Register callbacks to handle custom TLS Extensions for client or server. */ + +__owur int SSL_CTX_has_client_custom_ext(const SSL_CTX *ctx, + unsigned int ext_type); + +__owur int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +__owur int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +__owur int SSL_CTX_add_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + unsigned int context, + SSL_custom_ext_add_cb_ex add_cb, + SSL_custom_ext_free_cb_ex free_cb, + void *add_arg, + SSL_custom_ext_parse_cb_ex parse_cb, + void *parse_arg); + +__owur int SSL_extension_supported(unsigned int ext_type); + +# define SSL_NOTHING 1 +# define SSL_WRITING 2 +# define SSL_READING 3 +# define SSL_X509_LOOKUP 4 +# define SSL_ASYNC_PAUSED 5 +# define SSL_ASYNC_NO_JOBS 6 +# define SSL_CLIENT_HELLO_CB 7 +# define SSL_RETRY_VERIFY 8 + +/* These will only be used when doing non-blocking IO */ +# define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) +# define SSL_want_read(s) (SSL_want(s) == SSL_READING) +# define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) +# define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) +# define SSL_want_retry_verify(s) (SSL_want(s) == SSL_RETRY_VERIFY) +# define SSL_want_async(s) (SSL_want(s) == SSL_ASYNC_PAUSED) +# define SSL_want_async_job(s) (SSL_want(s) == SSL_ASYNC_NO_JOBS) +# define SSL_want_client_hello_cb(s) (SSL_want(s) == SSL_CLIENT_HELLO_CB) + +# define SSL_MAC_FLAG_READ_MAC_STREAM 1 +# define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 +# define SSL_MAC_FLAG_READ_MAC_TLSTREE 4 +# define SSL_MAC_FLAG_WRITE_MAC_TLSTREE 8 + +/* + * A callback for logging out TLS key material. This callback should log out + * |line| followed by a newline. + */ +typedef void (*SSL_CTX_keylog_cb_func)(const SSL *ssl, const char *line); + +/* + * SSL_CTX_set_keylog_callback configures a callback to log key material. This + * is intended for debugging use with tools like Wireshark. The cb function + * should log line followed by a newline. + */ +void SSL_CTX_set_keylog_callback(SSL_CTX *ctx, SSL_CTX_keylog_cb_func cb); + +/* + * SSL_CTX_get_keylog_callback returns the callback configured by + * SSL_CTX_set_keylog_callback. + */ +SSL_CTX_keylog_cb_func SSL_CTX_get_keylog_callback(const SSL_CTX *ctx); + +int SSL_CTX_set_max_early_data(SSL_CTX *ctx, uint32_t max_early_data); +uint32_t SSL_CTX_get_max_early_data(const SSL_CTX *ctx); +int SSL_set_max_early_data(SSL *s, uint32_t max_early_data); +uint32_t SSL_get_max_early_data(const SSL *s); +int SSL_CTX_set_recv_max_early_data(SSL_CTX *ctx, uint32_t recv_max_early_data); +uint32_t SSL_CTX_get_recv_max_early_data(const SSL_CTX *ctx); +int SSL_set_recv_max_early_data(SSL *s, uint32_t recv_max_early_data); +uint32_t SSL_get_recv_max_early_data(const SSL *s); + +#ifdef __cplusplus +} +#endif + +# include +# include +# include /* This is mostly sslv3 with a few tweaks */ +# include /* Datagram TLS */ +# include /* Support for the use_srtp extension */ +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * These need to be after the above set of includes due to a compiler bug + * in VisualStudio 2015 + */ +SKM_DEFINE_STACK_OF_INTERNAL(SSL_CIPHER, const SSL_CIPHER, SSL_CIPHER) +#define sk_SSL_CIPHER_num(sk) OPENSSL_sk_num(ossl_check_const_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_value(sk, idx) ((const SSL_CIPHER *)OPENSSL_sk_value(ossl_check_const_SSL_CIPHER_sk_type(sk), (idx))) +#define sk_SSL_CIPHER_new(cmp) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new(ossl_check_SSL_CIPHER_compfunc_type(cmp))) +#define sk_SSL_CIPHER_new_null() ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new_null()) +#define sk_SSL_CIPHER_new_reserve(cmp, n) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new_reserve(ossl_check_SSL_CIPHER_compfunc_type(cmp), (n))) +#define sk_SSL_CIPHER_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SSL_CIPHER_sk_type(sk), (n)) +#define sk_SSL_CIPHER_free(sk) OPENSSL_sk_free(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_zero(sk) OPENSSL_sk_zero(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_delete(sk, i) ((const SSL_CIPHER *)OPENSSL_sk_delete(ossl_check_SSL_CIPHER_sk_type(sk), (i))) +#define sk_SSL_CIPHER_delete_ptr(sk, ptr) ((const SSL_CIPHER *)OPENSSL_sk_delete_ptr(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr))) +#define sk_SSL_CIPHER_push(sk, ptr) OPENSSL_sk_push(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_pop(sk) ((const SSL_CIPHER *)OPENSSL_sk_pop(ossl_check_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_shift(sk) ((const SSL_CIPHER *)OPENSSL_sk_shift(ossl_check_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SSL_CIPHER_sk_type(sk),ossl_check_SSL_CIPHER_freefunc_type(freefunc)) +#define sk_SSL_CIPHER_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr), (idx)) +#define sk_SSL_CIPHER_set(sk, idx, ptr) ((const SSL_CIPHER *)OPENSSL_sk_set(ossl_check_SSL_CIPHER_sk_type(sk), (idx), ossl_check_SSL_CIPHER_type(ptr))) +#define sk_SSL_CIPHER_find(sk, ptr) OPENSSL_sk_find(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr), pnum) +#define sk_SSL_CIPHER_sort(sk) OPENSSL_sk_sort(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_dup(sk) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_dup(ossl_check_const_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_deep_copy(ossl_check_const_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_copyfunc_type(copyfunc), ossl_check_SSL_CIPHER_freefunc_type(freefunc))) +#define sk_SSL_CIPHER_set_cmp_func(sk, cmp) ((sk_SSL_CIPHER_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(SSL_COMP, SSL_COMP, SSL_COMP) +#define sk_SSL_COMP_num(sk) OPENSSL_sk_num(ossl_check_const_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_value(sk, idx) ((SSL_COMP *)OPENSSL_sk_value(ossl_check_const_SSL_COMP_sk_type(sk), (idx))) +#define sk_SSL_COMP_new(cmp) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new(ossl_check_SSL_COMP_compfunc_type(cmp))) +#define sk_SSL_COMP_new_null() ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new_null()) +#define sk_SSL_COMP_new_reserve(cmp, n) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new_reserve(ossl_check_SSL_COMP_compfunc_type(cmp), (n))) +#define sk_SSL_COMP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SSL_COMP_sk_type(sk), (n)) +#define sk_SSL_COMP_free(sk) OPENSSL_sk_free(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_zero(sk) OPENSSL_sk_zero(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_delete(sk, i) ((SSL_COMP *)OPENSSL_sk_delete(ossl_check_SSL_COMP_sk_type(sk), (i))) +#define sk_SSL_COMP_delete_ptr(sk, ptr) ((SSL_COMP *)OPENSSL_sk_delete_ptr(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr))) +#define sk_SSL_COMP_push(sk, ptr) OPENSSL_sk_push(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_pop(sk) ((SSL_COMP *)OPENSSL_sk_pop(ossl_check_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_shift(sk) ((SSL_COMP *)OPENSSL_sk_shift(ossl_check_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SSL_COMP_sk_type(sk),ossl_check_SSL_COMP_freefunc_type(freefunc)) +#define sk_SSL_COMP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr), (idx)) +#define sk_SSL_COMP_set(sk, idx, ptr) ((SSL_COMP *)OPENSSL_sk_set(ossl_check_SSL_COMP_sk_type(sk), (idx), ossl_check_SSL_COMP_type(ptr))) +#define sk_SSL_COMP_find(sk, ptr) OPENSSL_sk_find(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr), pnum) +#define sk_SSL_COMP_sort(sk) OPENSSL_sk_sort(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_dup(sk) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_dup(ossl_check_const_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_deep_copy(ossl_check_const_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_copyfunc_type(copyfunc), ossl_check_SSL_COMP_freefunc_type(freefunc))) +#define sk_SSL_COMP_set_cmp_func(sk, cmp) ((sk_SSL_COMP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_compfunc_type(cmp))) + + +/* compatibility */ +# define SSL_set_app_data(s,arg) (SSL_set_ex_data(s,0,(char *)(arg))) +# define SSL_get_app_data(s) (SSL_get_ex_data(s,0)) +# define SSL_SESSION_set_app_data(s,a) (SSL_SESSION_set_ex_data(s,0, \ + (char *)(a))) +# define SSL_SESSION_get_app_data(s) (SSL_SESSION_get_ex_data(s,0)) +# define SSL_CTX_get_app_data(ctx) (SSL_CTX_get_ex_data(ctx,0)) +# define SSL_CTX_set_app_data(ctx,arg) (SSL_CTX_set_ex_data(ctx,0, \ + (char *)(arg))) +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void SSL_set_debug(SSL *s, int debug); +# endif + +/* TLSv1.3 KeyUpdate message types */ +/* -1 used so that this is an invalid value for the on-the-wire protocol */ +#define SSL_KEY_UPDATE_NONE -1 +/* Values as defined for the on-the-wire protocol */ +#define SSL_KEY_UPDATE_NOT_REQUESTED 0 +#define SSL_KEY_UPDATE_REQUESTED 1 + +/* + * The valid handshake states (one for each type message sent and one for each + * type of message received). There are also two "special" states: + * TLS = TLS or DTLS state + * DTLS = DTLS specific state + * CR/SR = Client Read/Server Read + * CW/SW = Client Write/Server Write + * + * The "special" states are: + * TLS_ST_BEFORE = No handshake has been initiated yet + * TLS_ST_OK = A handshake has been successfully completed + */ +typedef enum { + TLS_ST_BEFORE, + TLS_ST_OK, + DTLS_ST_CR_HELLO_VERIFY_REQUEST, + TLS_ST_CR_SRVR_HELLO, + TLS_ST_CR_CERT, + TLS_ST_CR_COMP_CERT, + TLS_ST_CR_CERT_STATUS, + TLS_ST_CR_KEY_EXCH, + TLS_ST_CR_CERT_REQ, + TLS_ST_CR_SRVR_DONE, + TLS_ST_CR_SESSION_TICKET, + TLS_ST_CR_CHANGE, + TLS_ST_CR_FINISHED, + TLS_ST_CW_CLNT_HELLO, + TLS_ST_CW_CERT, + TLS_ST_CW_COMP_CERT, + TLS_ST_CW_KEY_EXCH, + TLS_ST_CW_CERT_VRFY, + TLS_ST_CW_CHANGE, + TLS_ST_CW_NEXT_PROTO, + TLS_ST_CW_FINISHED, + TLS_ST_SW_HELLO_REQ, + TLS_ST_SR_CLNT_HELLO, + DTLS_ST_SW_HELLO_VERIFY_REQUEST, + TLS_ST_SW_SRVR_HELLO, + TLS_ST_SW_CERT, + TLS_ST_SW_COMP_CERT, + TLS_ST_SW_KEY_EXCH, + TLS_ST_SW_CERT_REQ, + TLS_ST_SW_SRVR_DONE, + TLS_ST_SR_CERT, + TLS_ST_SR_COMP_CERT, + TLS_ST_SR_KEY_EXCH, + TLS_ST_SR_CERT_VRFY, + TLS_ST_SR_NEXT_PROTO, + TLS_ST_SR_CHANGE, + TLS_ST_SR_FINISHED, + TLS_ST_SW_SESSION_TICKET, + TLS_ST_SW_CERT_STATUS, + TLS_ST_SW_CHANGE, + TLS_ST_SW_FINISHED, + TLS_ST_SW_ENCRYPTED_EXTENSIONS, + TLS_ST_CR_ENCRYPTED_EXTENSIONS, + TLS_ST_CR_CERT_VRFY, + TLS_ST_SW_CERT_VRFY, + TLS_ST_CR_HELLO_REQ, + TLS_ST_SW_KEY_UPDATE, + TLS_ST_CW_KEY_UPDATE, + TLS_ST_SR_KEY_UPDATE, + TLS_ST_CR_KEY_UPDATE, + TLS_ST_EARLY_DATA, + TLS_ST_PENDING_EARLY_DATA_END, + TLS_ST_CW_END_OF_EARLY_DATA, + TLS_ST_SR_END_OF_EARLY_DATA +} OSSL_HANDSHAKE_STATE; + +/* + * Most of the following state values are no longer used and are defined to be + * the closest equivalent value in the current state machine code. Not all + * defines have an equivalent and are set to a dummy value (-1). SSL_ST_CONNECT + * and SSL_ST_ACCEPT are still in use in the definition of SSL_CB_ACCEPT_LOOP, + * SSL_CB_ACCEPT_EXIT, SSL_CB_CONNECT_LOOP and SSL_CB_CONNECT_EXIT. + */ + +# define SSL_ST_CONNECT 0x1000 +# define SSL_ST_ACCEPT 0x2000 + +# define SSL_ST_MASK 0x0FFF + +# define SSL_CB_LOOP 0x01 +# define SSL_CB_EXIT 0x02 +# define SSL_CB_READ 0x04 +# define SSL_CB_WRITE 0x08 +# define SSL_CB_ALERT 0x4000/* used in callback */ +# define SSL_CB_READ_ALERT (SSL_CB_ALERT|SSL_CB_READ) +# define SSL_CB_WRITE_ALERT (SSL_CB_ALERT|SSL_CB_WRITE) +# define SSL_CB_ACCEPT_LOOP (SSL_ST_ACCEPT|SSL_CB_LOOP) +# define SSL_CB_ACCEPT_EXIT (SSL_ST_ACCEPT|SSL_CB_EXIT) +# define SSL_CB_CONNECT_LOOP (SSL_ST_CONNECT|SSL_CB_LOOP) +# define SSL_CB_CONNECT_EXIT (SSL_ST_CONNECT|SSL_CB_EXIT) +# define SSL_CB_HANDSHAKE_START 0x10 +# define SSL_CB_HANDSHAKE_DONE 0x20 + +/* Is the SSL_connection established? */ +# define SSL_in_connect_init(a) (SSL_in_init(a) && !SSL_is_server(a)) +# define SSL_in_accept_init(a) (SSL_in_init(a) && SSL_is_server(a)) +int SSL_in_init(const SSL *s); +int SSL_in_before(const SSL *s); +int SSL_is_init_finished(const SSL *s); + +/* + * The following 3 states are kept in ssl->rlayer.rstate when reads fail, you + * should not need these + */ +# define SSL_ST_READ_HEADER 0xF0 +# define SSL_ST_READ_BODY 0xF1 +# define SSL_ST_READ_DONE 0xF2 + +/*- + * Obtain latest Finished message + * -- that we sent (SSL_get_finished) + * -- that we expected from peer (SSL_get_peer_finished). + * Returns length (0 == no Finished so far), copies up to 'count' bytes. + */ +size_t SSL_get_finished(const SSL *s, void *buf, size_t count); +size_t SSL_get_peer_finished(const SSL *s, void *buf, size_t count); + +/* + * use either SSL_VERIFY_NONE or SSL_VERIFY_PEER, the last 3 options are + * 'ored' with SSL_VERIFY_PEER if they are desired + */ +# define SSL_VERIFY_NONE 0x00 +# define SSL_VERIFY_PEER 0x01 +# define SSL_VERIFY_FAIL_IF_NO_PEER_CERT 0x02 +# define SSL_VERIFY_CLIENT_ONCE 0x04 +# define SSL_VERIFY_POST_HANDSHAKE 0x08 + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OpenSSL_add_ssl_algorithms() SSL_library_init() +# define SSLeay_add_ssl_algorithms() SSL_library_init() +# endif + +/* More backward compatibility */ +# define SSL_get_cipher(s) \ + SSL_CIPHER_get_name(SSL_get_current_cipher(s)) +# define SSL_get_cipher_bits(s,np) \ + SSL_CIPHER_get_bits(SSL_get_current_cipher(s),np) +# define SSL_get_cipher_version(s) \ + SSL_CIPHER_get_version(SSL_get_current_cipher(s)) +# define SSL_get_cipher_name(s) \ + SSL_CIPHER_get_name(SSL_get_current_cipher(s)) +# define SSL_get_time(a) SSL_SESSION_get_time(a) +# define SSL_set_time(a,b) SSL_SESSION_set_time((a),(b)) +# define SSL_get_timeout(a) SSL_SESSION_get_timeout(a) +# define SSL_set_timeout(a,b) SSL_SESSION_set_timeout((a),(b)) + +# define d2i_SSL_SESSION_bio(bp,s_id) ASN1_d2i_bio_of(SSL_SESSION,SSL_SESSION_new,d2i_SSL_SESSION,bp,s_id) +# define i2d_SSL_SESSION_bio(bp,s_id) ASN1_i2d_bio_of(SSL_SESSION,i2d_SSL_SESSION,bp,s_id) + +DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) +# define SSL_AD_REASON_OFFSET 1000/* offset to get SSL_R_... value + * from SSL_AD_... */ +/* These alert types are for SSLv3 and TLSv1 */ +# define SSL_AD_CLOSE_NOTIFY SSL3_AD_CLOSE_NOTIFY +/* fatal */ +# define SSL_AD_UNEXPECTED_MESSAGE SSL3_AD_UNEXPECTED_MESSAGE +/* fatal */ +# define SSL_AD_BAD_RECORD_MAC SSL3_AD_BAD_RECORD_MAC +# define SSL_AD_DECRYPTION_FAILED TLS1_AD_DECRYPTION_FAILED +# define SSL_AD_RECORD_OVERFLOW TLS1_AD_RECORD_OVERFLOW +/* fatal */ +# define SSL_AD_DECOMPRESSION_FAILURE SSL3_AD_DECOMPRESSION_FAILURE +/* fatal */ +# define SSL_AD_HANDSHAKE_FAILURE SSL3_AD_HANDSHAKE_FAILURE +/* Not for TLS */ +# define SSL_AD_NO_CERTIFICATE SSL3_AD_NO_CERTIFICATE +# define SSL_AD_BAD_CERTIFICATE SSL3_AD_BAD_CERTIFICATE +# define SSL_AD_UNSUPPORTED_CERTIFICATE SSL3_AD_UNSUPPORTED_CERTIFICATE +# define SSL_AD_CERTIFICATE_REVOKED SSL3_AD_CERTIFICATE_REVOKED +# define SSL_AD_CERTIFICATE_EXPIRED SSL3_AD_CERTIFICATE_EXPIRED +# define SSL_AD_CERTIFICATE_UNKNOWN SSL3_AD_CERTIFICATE_UNKNOWN +/* fatal */ +# define SSL_AD_ILLEGAL_PARAMETER SSL3_AD_ILLEGAL_PARAMETER +/* fatal */ +# define SSL_AD_UNKNOWN_CA TLS1_AD_UNKNOWN_CA +/* fatal */ +# define SSL_AD_ACCESS_DENIED TLS1_AD_ACCESS_DENIED +/* fatal */ +# define SSL_AD_DECODE_ERROR TLS1_AD_DECODE_ERROR +# define SSL_AD_DECRYPT_ERROR TLS1_AD_DECRYPT_ERROR +/* fatal */ +# define SSL_AD_EXPORT_RESTRICTION TLS1_AD_EXPORT_RESTRICTION +/* fatal */ +# define SSL_AD_PROTOCOL_VERSION TLS1_AD_PROTOCOL_VERSION +/* fatal */ +# define SSL_AD_INSUFFICIENT_SECURITY TLS1_AD_INSUFFICIENT_SECURITY +/* fatal */ +# define SSL_AD_INTERNAL_ERROR TLS1_AD_INTERNAL_ERROR +# define SSL_AD_USER_CANCELLED TLS1_AD_USER_CANCELLED +# define SSL_AD_NO_RENEGOTIATION TLS1_AD_NO_RENEGOTIATION +# define SSL_AD_MISSING_EXTENSION TLS13_AD_MISSING_EXTENSION +# define SSL_AD_CERTIFICATE_REQUIRED TLS13_AD_CERTIFICATE_REQUIRED +# define SSL_AD_UNSUPPORTED_EXTENSION TLS1_AD_UNSUPPORTED_EXTENSION +# define SSL_AD_CERTIFICATE_UNOBTAINABLE TLS1_AD_CERTIFICATE_UNOBTAINABLE +# define SSL_AD_UNRECOGNIZED_NAME TLS1_AD_UNRECOGNIZED_NAME +# define SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE TLS1_AD_BAD_CERTIFICATE_STATUS_RESPONSE +# define SSL_AD_BAD_CERTIFICATE_HASH_VALUE TLS1_AD_BAD_CERTIFICATE_HASH_VALUE +/* fatal */ +# define SSL_AD_UNKNOWN_PSK_IDENTITY TLS1_AD_UNKNOWN_PSK_IDENTITY +/* fatal */ +# define SSL_AD_INAPPROPRIATE_FALLBACK TLS1_AD_INAPPROPRIATE_FALLBACK +# define SSL_AD_NO_APPLICATION_PROTOCOL TLS1_AD_NO_APPLICATION_PROTOCOL +# define SSL_ERROR_NONE 0 +# define SSL_ERROR_SSL 1 +# define SSL_ERROR_WANT_READ 2 +# define SSL_ERROR_WANT_WRITE 3 +# define SSL_ERROR_WANT_X509_LOOKUP 4 +# define SSL_ERROR_SYSCALL 5/* look at error stack/return + * value/errno */ +# define SSL_ERROR_ZERO_RETURN 6 +# define SSL_ERROR_WANT_CONNECT 7 +# define SSL_ERROR_WANT_ACCEPT 8 +# define SSL_ERROR_WANT_ASYNC 9 +# define SSL_ERROR_WANT_ASYNC_JOB 10 +# define SSL_ERROR_WANT_CLIENT_HELLO_CB 11 +# define SSL_ERROR_WANT_RETRY_VERIFY 12 + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTRL_SET_TMP_DH 3 +# define SSL_CTRL_SET_TMP_ECDH 4 +# define SSL_CTRL_SET_TMP_DH_CB 6 +# endif + +# define SSL_CTRL_GET_CLIENT_CERT_REQUEST 9 +# define SSL_CTRL_GET_NUM_RENEGOTIATIONS 10 +# define SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS 11 +# define SSL_CTRL_GET_TOTAL_RENEGOTIATIONS 12 +# define SSL_CTRL_GET_FLAGS 13 +# define SSL_CTRL_EXTRA_CHAIN_CERT 14 +# define SSL_CTRL_SET_MSG_CALLBACK 15 +# define SSL_CTRL_SET_MSG_CALLBACK_ARG 16 +/* only applies to datagram connections */ +# define SSL_CTRL_SET_MTU 17 +/* Stats */ +# define SSL_CTRL_SESS_NUMBER 20 +# define SSL_CTRL_SESS_CONNECT 21 +# define SSL_CTRL_SESS_CONNECT_GOOD 22 +# define SSL_CTRL_SESS_CONNECT_RENEGOTIATE 23 +# define SSL_CTRL_SESS_ACCEPT 24 +# define SSL_CTRL_SESS_ACCEPT_GOOD 25 +# define SSL_CTRL_SESS_ACCEPT_RENEGOTIATE 26 +# define SSL_CTRL_SESS_HIT 27 +# define SSL_CTRL_SESS_CB_HIT 28 +# define SSL_CTRL_SESS_MISSES 29 +# define SSL_CTRL_SESS_TIMEOUTS 30 +# define SSL_CTRL_SESS_CACHE_FULL 31 +# define SSL_CTRL_MODE 33 +# define SSL_CTRL_GET_READ_AHEAD 40 +# define SSL_CTRL_SET_READ_AHEAD 41 +# define SSL_CTRL_SET_SESS_CACHE_SIZE 42 +# define SSL_CTRL_GET_SESS_CACHE_SIZE 43 +# define SSL_CTRL_SET_SESS_CACHE_MODE 44 +# define SSL_CTRL_GET_SESS_CACHE_MODE 45 +# define SSL_CTRL_GET_MAX_CERT_LIST 50 +# define SSL_CTRL_SET_MAX_CERT_LIST 51 +# define SSL_CTRL_SET_MAX_SEND_FRAGMENT 52 +/* see tls1.h for macros based on these */ +# define SSL_CTRL_SET_TLSEXT_SERVERNAME_CB 53 +# define SSL_CTRL_SET_TLSEXT_SERVERNAME_ARG 54 +# define SSL_CTRL_SET_TLSEXT_HOSTNAME 55 +# define SSL_CTRL_SET_TLSEXT_DEBUG_CB 56 +# define SSL_CTRL_SET_TLSEXT_DEBUG_ARG 57 +# define SSL_CTRL_GET_TLSEXT_TICKET_KEYS 58 +# define SSL_CTRL_SET_TLSEXT_TICKET_KEYS 59 +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT 60 */ +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB 61 */ +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG 62 */ +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB 63 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB_ARG 64 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_TYPE 65 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_EXTS 66 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_EXTS 67 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_IDS 68 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_IDS 69 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_OCSP_RESP 70 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_OCSP_RESP 71 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB 72 +# endif +# define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB 75 +# define SSL_CTRL_SET_SRP_VERIFY_PARAM_CB 76 +# define SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB 77 +# define SSL_CTRL_SET_SRP_ARG 78 +# define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME 79 +# define SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH 80 +# define SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD 81 +# define DTLS_CTRL_GET_TIMEOUT 73 +# define DTLS_CTRL_HANDLE_TIMEOUT 74 +# define SSL_CTRL_GET_RI_SUPPORT 76 +# define SSL_CTRL_CLEAR_MODE 78 +# define SSL_CTRL_SET_NOT_RESUMABLE_SESS_CB 79 +# define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 +# define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 +# define SSL_CTRL_CHAIN 88 +# define SSL_CTRL_CHAIN_CERT 89 +# define SSL_CTRL_GET_GROUPS 90 +# define SSL_CTRL_SET_GROUPS 91 +# define SSL_CTRL_SET_GROUPS_LIST 92 +# define SSL_CTRL_GET_SHARED_GROUP 93 +# define SSL_CTRL_SET_SIGALGS 97 +# define SSL_CTRL_SET_SIGALGS_LIST 98 +# define SSL_CTRL_CERT_FLAGS 99 +# define SSL_CTRL_CLEAR_CERT_FLAGS 100 +# define SSL_CTRL_SET_CLIENT_SIGALGS 101 +# define SSL_CTRL_SET_CLIENT_SIGALGS_LIST 102 +# define SSL_CTRL_GET_CLIENT_CERT_TYPES 103 +# define SSL_CTRL_SET_CLIENT_CERT_TYPES 104 +# define SSL_CTRL_BUILD_CERT_CHAIN 105 +# define SSL_CTRL_SET_VERIFY_CERT_STORE 106 +# define SSL_CTRL_SET_CHAIN_CERT_STORE 107 +# define SSL_CTRL_GET_PEER_SIGNATURE_NID 108 +# define SSL_CTRL_GET_PEER_TMP_KEY 109 +# define SSL_CTRL_GET_RAW_CIPHERLIST 110 +# define SSL_CTRL_GET_EC_POINT_FORMATS 111 +# define SSL_CTRL_GET_CHAIN_CERTS 115 +# define SSL_CTRL_SELECT_CURRENT_CERT 116 +# define SSL_CTRL_SET_CURRENT_CERT 117 +# define SSL_CTRL_SET_DH_AUTO 118 +# define DTLS_CTRL_SET_LINK_MTU 120 +# define DTLS_CTRL_GET_LINK_MIN_MTU 121 +# define SSL_CTRL_GET_EXTMS_SUPPORT 122 +# define SSL_CTRL_SET_MIN_PROTO_VERSION 123 +# define SSL_CTRL_SET_MAX_PROTO_VERSION 124 +# define SSL_CTRL_SET_SPLIT_SEND_FRAGMENT 125 +# define SSL_CTRL_SET_MAX_PIPELINES 126 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_TYPE 127 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_CB 128 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_CB_ARG 129 +# define SSL_CTRL_GET_MIN_PROTO_VERSION 130 +# define SSL_CTRL_GET_MAX_PROTO_VERSION 131 +# define SSL_CTRL_GET_SIGNATURE_NID 132 +# define SSL_CTRL_GET_TMP_KEY 133 +# define SSL_CTRL_GET_NEGOTIATED_GROUP 134 +# define SSL_CTRL_GET_IANA_GROUPS 135 +# define SSL_CTRL_SET_RETRY_VERIFY 136 +# define SSL_CTRL_GET_VERIFY_CERT_STORE 137 +# define SSL_CTRL_GET_CHAIN_CERT_STORE 138 +# define SSL_CERT_SET_FIRST 1 +# define SSL_CERT_SET_NEXT 2 +# define SSL_CERT_SET_SERVER 3 +# define DTLSv1_get_timeout(ssl, arg) \ + SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)(arg)) +# define DTLSv1_handle_timeout(ssl) \ + SSL_ctrl(ssl,DTLS_CTRL_HANDLE_TIMEOUT,0, NULL) +# define SSL_num_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_NUM_RENEGOTIATIONS,0,NULL) +# define SSL_clear_num_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS,0,NULL) +# define SSL_total_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_TOTAL_RENEGOTIATIONS,0,NULL) +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTX_set_tmp_dh(ctx,dh) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TMP_DH,0,(char *)(dh)) +# endif +# define SSL_CTX_set_dh_auto(ctx, onoff) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_DH_AUTO,onoff,NULL) +# define SSL_set_dh_auto(s, onoff) \ + SSL_ctrl(s,SSL_CTRL_SET_DH_AUTO,onoff,NULL) +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_set_tmp_dh(ssl,dh) \ + SSL_ctrl(ssl,SSL_CTRL_SET_TMP_DH,0,(char *)(dh)) +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTX_set_tmp_ecdh(ctx,ecdh) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TMP_ECDH,0,(char *)(ecdh)) +# define SSL_set_tmp_ecdh(ssl,ecdh) \ + SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)(ecdh)) +# endif +# define SSL_CTX_add_extra_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)(x509)) +# define SSL_CTX_get_extra_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509) +# define SSL_CTX_get_extra_chain_certs_only(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,1,px509) +# define SSL_CTX_clear_extra_chain_certs(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) +# define SSL_CTX_set0_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)(sk)) +# define SSL_CTX_set1_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)(sk)) +# define SSL_CTX_add0_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)(x509)) +# define SSL_CTX_add1_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)(x509)) +# define SSL_CTX_get0_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_CTX_clear_chain_certs(ctx) \ + SSL_CTX_set0_chain(ctx,NULL) +# define SSL_CTX_build_cert_chain(ctx, flags) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_CTX_select_current_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)(x509)) +# define SSL_CTX_set_current_cert(ctx, op) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_CTX_set0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set1_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)(st)) +# define SSL_CTX_get0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set1_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)(st)) +# define SSL_CTX_get0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_set0_chain(s,sk) \ + SSL_ctrl(s,SSL_CTRL_CHAIN,0,(char *)(sk)) +# define SSL_set1_chain(s,sk) \ + SSL_ctrl(s,SSL_CTRL_CHAIN,1,(char *)(sk)) +# define SSL_add0_chain_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_CHAIN_CERT,0,(char *)(x509)) +# define SSL_add1_chain_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_CHAIN_CERT,1,(char *)(x509)) +# define SSL_get0_chain_certs(s,px509) \ + SSL_ctrl(s,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_clear_chain_certs(s) \ + SSL_set0_chain(s,NULL) +# define SSL_build_cert_chain(s, flags) \ + SSL_ctrl(s,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_select_current_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)(x509)) +# define SSL_set_current_cert(s,op) \ + SSL_ctrl(s,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_set0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_set1_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)(st)) +#define SSL_get0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_GET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_set0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_set1_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)(st)) +#define SSL_get0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_GET_CHAIN_CERT_STORE,0,(char *)(st)) + +# define SSL_get1_groups(s, glist) \ + SSL_ctrl(s,SSL_CTRL_GET_GROUPS,0,(int*)(glist)) +# define SSL_get0_iana_groups(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_IANA_GROUPS,0,(uint16_t **)(plst)) +# define SSL_CTX_set1_groups(ctx, glist, glistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS,glistlen,(int *)(glist)) +# define SSL_CTX_set1_groups_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(s)) +# define SSL_set1_groups(s, glist, glistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_GROUPS,glistlen,(char *)(glist)) +# define SSL_set1_groups_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(str)) +# define SSL_get_shared_group(s, n) \ + SSL_ctrl(s,SSL_CTRL_GET_SHARED_GROUP,n,NULL) +# define SSL_get_negotiated_group(s) \ + SSL_ctrl(s,SSL_CTRL_GET_NEGOTIATED_GROUP,0,NULL) +# define SSL_CTX_set1_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS,slistlen,(int *)(slist)) +# define SSL_CTX_set1_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)(s)) +# define SSL_set1_sigalgs(s, slist, slistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_SIGALGS,slistlen,(int *)(slist)) +# define SSL_set1_sigalgs_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)(str)) +# define SSL_CTX_set1_client_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)(slist)) +# define SSL_CTX_set1_client_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)(s)) +# define SSL_set1_client_sigalgs(s, slist, slistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)(slist)) +# define SSL_set1_client_sigalgs_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)(str)) +# define SSL_get0_certificate_types(s, clist) \ + SSL_ctrl(s, SSL_CTRL_GET_CLIENT_CERT_TYPES, 0, (char *)(clist)) +# define SSL_CTX_set1_client_certificate_types(ctx, clist, clistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen, \ + (char *)(clist)) +# define SSL_set1_client_certificate_types(s, clist, clistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)(clist)) +# define SSL_get_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NID,0,pn) +# define SSL_get_peer_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn) +# define SSL_get_peer_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_TMP_KEY,0,pk) +# define SSL_get_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_TMP_KEY,0,pk) +# define SSL_get0_raw_cipherlist(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_RAW_CIPHERLIST,0,plst) +# define SSL_get0_ec_point_formats(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_EC_POINT_FORMATS,0,plst) +# define SSL_CTX_set_min_proto_version(ctx, version) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_SET_MIN_PROTO_VERSION, version, NULL) +# define SSL_CTX_set_max_proto_version(ctx, version) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_SET_MAX_PROTO_VERSION, version, NULL) +# define SSL_CTX_get_min_proto_version(ctx) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_GET_MIN_PROTO_VERSION, 0, NULL) +# define SSL_CTX_get_max_proto_version(ctx) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_GET_MAX_PROTO_VERSION, 0, NULL) +# define SSL_set_min_proto_version(s, version) \ + SSL_ctrl(s, SSL_CTRL_SET_MIN_PROTO_VERSION, version, NULL) +# define SSL_set_max_proto_version(s, version) \ + SSL_ctrl(s, SSL_CTRL_SET_MAX_PROTO_VERSION, version, NULL) +# define SSL_get_min_proto_version(s) \ + SSL_ctrl(s, SSL_CTRL_GET_MIN_PROTO_VERSION, 0, NULL) +# define SSL_get_max_proto_version(s) \ + SSL_ctrl(s, SSL_CTRL_GET_MAX_PROTO_VERSION, 0, NULL) + +const char *SSL_get0_group_name(SSL *s); +const char *SSL_group_to_name(SSL *s, int id); + +/* Backwards compatibility, original 1.1.0 names */ +# define SSL_CTRL_GET_SERVER_TMP_KEY \ + SSL_CTRL_GET_PEER_TMP_KEY +# define SSL_get_server_tmp_key(s, pk) \ + SSL_get_peer_tmp_key(s, pk) + +int SSL_set0_tmp_dh_pkey(SSL *s, EVP_PKEY *dhpkey); +int SSL_CTX_set0_tmp_dh_pkey(SSL_CTX *ctx, EVP_PKEY *dhpkey); + +/* + * The following symbol names are old and obsolete. They are kept + * for compatibility reasons only and should not be used anymore. + */ +# define SSL_CTRL_GET_CURVES SSL_CTRL_GET_GROUPS +# define SSL_CTRL_SET_CURVES SSL_CTRL_SET_GROUPS +# define SSL_CTRL_SET_CURVES_LIST SSL_CTRL_SET_GROUPS_LIST +# define SSL_CTRL_GET_SHARED_CURVE SSL_CTRL_GET_SHARED_GROUP + +# define SSL_get1_curves SSL_get1_groups +# define SSL_CTX_set1_curves SSL_CTX_set1_groups +# define SSL_CTX_set1_curves_list SSL_CTX_set1_groups_list +# define SSL_set1_curves SSL_set1_groups +# define SSL_set1_curves_list SSL_set1_groups_list +# define SSL_get_shared_curve SSL_get_shared_group + + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* Provide some compatibility macros for removed functionality. */ +# define SSL_CTX_need_tmp_RSA(ctx) 0 +# define SSL_CTX_set_tmp_rsa(ctx,rsa) 1 +# define SSL_need_tmp_RSA(ssl) 0 +# define SSL_set_tmp_rsa(ssl,rsa) 1 +# define SSL_CTX_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +# define SSL_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +/* + * We "pretend" to call the callback to avoid warnings about unused static + * functions. + */ +# define SSL_CTX_set_tmp_rsa_callback(ctx, cb) while(0) (cb)(NULL, 0, 0) +# define SSL_set_tmp_rsa_callback(ssl, cb) while(0) (cb)(NULL, 0, 0) +# endif +__owur const BIO_METHOD *BIO_f_ssl(void); +__owur BIO *BIO_new_ssl(SSL_CTX *ctx, int client); +__owur BIO *BIO_new_ssl_connect(SSL_CTX *ctx); +__owur BIO *BIO_new_buffer_ssl_connect(SSL_CTX *ctx); +__owur int BIO_ssl_copy_session_id(BIO *to, BIO *from); +void BIO_ssl_shutdown(BIO *ssl_bio); + +__owur int SSL_CTX_set_cipher_list(SSL_CTX *, const char *str); +__owur SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth); +__owur SSL_CTX *SSL_CTX_new_ex(OSSL_LIB_CTX *libctx, const char *propq, + const SSL_METHOD *meth); +int SSL_CTX_up_ref(SSL_CTX *ctx); +void SSL_CTX_free(SSL_CTX *); +__owur long SSL_CTX_set_timeout(SSL_CTX *ctx, long t); +__owur long SSL_CTX_get_timeout(const SSL_CTX *ctx); +__owur X509_STORE *SSL_CTX_get_cert_store(const SSL_CTX *); +void SSL_CTX_set_cert_store(SSL_CTX *, X509_STORE *); +void SSL_CTX_set1_cert_store(SSL_CTX *, X509_STORE *); +__owur int SSL_want(const SSL *s); +__owur int SSL_clear(SSL *s); + +void SSL_CTX_flush_sessions(SSL_CTX *ctx, long tm); + +__owur const SSL_CIPHER *SSL_get_current_cipher(const SSL *s); +__owur const SSL_CIPHER *SSL_get_pending_cipher(const SSL *s); +__owur int SSL_CIPHER_get_bits(const SSL_CIPHER *c, int *alg_bits); +__owur const char *SSL_CIPHER_get_version(const SSL_CIPHER *c); +__owur const char *SSL_CIPHER_get_name(const SSL_CIPHER *c); +__owur const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c); +__owur const char *OPENSSL_cipher_name(const char *rfc_name); +__owur uint32_t SSL_CIPHER_get_id(const SSL_CIPHER *c); +__owur uint16_t SSL_CIPHER_get_protocol_id(const SSL_CIPHER *c); +__owur int SSL_CIPHER_get_kx_nid(const SSL_CIPHER *c); +__owur int SSL_CIPHER_get_auth_nid(const SSL_CIPHER *c); +__owur const EVP_MD *SSL_CIPHER_get_handshake_digest(const SSL_CIPHER *c); +__owur int SSL_CIPHER_is_aead(const SSL_CIPHER *c); + +__owur int SSL_get_fd(const SSL *s); +__owur int SSL_get_rfd(const SSL *s); +__owur int SSL_get_wfd(const SSL *s); +__owur const char *SSL_get_cipher_list(const SSL *s, int n); +__owur char *SSL_get_shared_ciphers(const SSL *s, char *buf, int size); +__owur int SSL_get_read_ahead(const SSL *s); +__owur int SSL_pending(const SSL *s); +__owur int SSL_has_pending(const SSL *s); +# ifndef OPENSSL_NO_SOCK +__owur int SSL_set_fd(SSL *s, int fd); +__owur int SSL_set_rfd(SSL *s, int fd); +__owur int SSL_set_wfd(SSL *s, int fd); +# endif +void SSL_set0_rbio(SSL *s, BIO *rbio); +void SSL_set0_wbio(SSL *s, BIO *wbio); +void SSL_set_bio(SSL *s, BIO *rbio, BIO *wbio); +__owur BIO *SSL_get_rbio(const SSL *s); +__owur BIO *SSL_get_wbio(const SSL *s); +__owur int SSL_set_cipher_list(SSL *s, const char *str); +__owur int SSL_CTX_set_ciphersuites(SSL_CTX *ctx, const char *str); +__owur int SSL_set_ciphersuites(SSL *s, const char *str); +void SSL_set_read_ahead(SSL *s, int yes); +__owur int SSL_get_verify_mode(const SSL *s); +__owur int SSL_get_verify_depth(const SSL *s); +__owur SSL_verify_cb SSL_get_verify_callback(const SSL *s); +void SSL_set_verify(SSL *s, int mode, SSL_verify_cb callback); +void SSL_set_verify_depth(SSL *s, int depth); +void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 __owur int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa); +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_use_RSAPrivateKey_ASN1(SSL *ssl, + const unsigned char *d, long len); +# endif +__owur int SSL_use_PrivateKey(SSL *ssl, EVP_PKEY *pkey); +__owur int SSL_use_PrivateKey_ASN1(int pk, SSL *ssl, const unsigned char *d, + long len); +__owur int SSL_use_certificate(SSL *ssl, X509 *x); +__owur int SSL_use_certificate_ASN1(SSL *ssl, const unsigned char *d, int len); +__owur int SSL_use_cert_and_key(SSL *ssl, X509 *x509, EVP_PKEY *privatekey, + STACK_OF(X509) *chain, int override); + + +/* serverinfo file format versions */ +# define SSL_SERVERINFOV1 1 +# define SSL_SERVERINFOV2 2 + +/* Set serverinfo data for the current active cert. */ +__owur int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, + size_t serverinfo_length); +__owur int SSL_CTX_use_serverinfo_ex(SSL_CTX *ctx, unsigned int version, + const unsigned char *serverinfo, + size_t serverinfo_length); +__owur int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_use_RSAPrivateKey_file(SSL *ssl, const char *file, int type); +#endif + +__owur int SSL_use_PrivateKey_file(SSL *ssl, const char *file, int type); +__owur int SSL_use_certificate_file(SSL *ssl, const char *file, int type); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey_file(SSL_CTX *ctx, const char *file, + int type); +#endif +__owur int SSL_CTX_use_PrivateKey_file(SSL_CTX *ctx, const char *file, + int type); +__owur int SSL_CTX_use_certificate_file(SSL_CTX *ctx, const char *file, + int type); +/* PEM type */ +__owur int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file); +__owur int SSL_use_certificate_chain_file(SSL *ssl, const char *file); +__owur STACK_OF(X509_NAME) *SSL_load_client_CA_file(const char *file); +__owur STACK_OF(X509_NAME) +*SSL_load_client_CA_file_ex(const char *file, OSSL_LIB_CTX *libctx, + const char *propq); +__owur int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *file); +int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *dir); +int SSL_add_store_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *uri); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_load_error_strings() \ + OPENSSL_init_ssl(OPENSSL_INIT_LOAD_SSL_STRINGS \ + | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL) +# endif + +__owur const char *SSL_state_string(const SSL *s); +__owur const char *SSL_rstate_string(const SSL *s); +__owur const char *SSL_state_string_long(const SSL *s); +__owur const char *SSL_rstate_string_long(const SSL *s); +__owur long SSL_SESSION_get_time(const SSL_SESSION *s); +__owur long SSL_SESSION_set_time(SSL_SESSION *s, long t); +__owur long SSL_SESSION_get_timeout(const SSL_SESSION *s); +__owur long SSL_SESSION_set_timeout(SSL_SESSION *s, long t); +__owur int SSL_SESSION_get_protocol_version(const SSL_SESSION *s); +__owur int SSL_SESSION_set_protocol_version(SSL_SESSION *s, int version); + +__owur const char *SSL_SESSION_get0_hostname(const SSL_SESSION *s); +__owur int SSL_SESSION_set1_hostname(SSL_SESSION *s, const char *hostname); +void SSL_SESSION_get0_alpn_selected(const SSL_SESSION *s, + const unsigned char **alpn, + size_t *len); +__owur int SSL_SESSION_set1_alpn_selected(SSL_SESSION *s, + const unsigned char *alpn, + size_t len); +__owur const SSL_CIPHER *SSL_SESSION_get0_cipher(const SSL_SESSION *s); +__owur int SSL_SESSION_set_cipher(SSL_SESSION *s, const SSL_CIPHER *cipher); +__owur int SSL_SESSION_has_ticket(const SSL_SESSION *s); +__owur unsigned long SSL_SESSION_get_ticket_lifetime_hint(const SSL_SESSION *s); +void SSL_SESSION_get0_ticket(const SSL_SESSION *s, const unsigned char **tick, + size_t *len); +__owur uint32_t SSL_SESSION_get_max_early_data(const SSL_SESSION *s); +__owur int SSL_SESSION_set_max_early_data(SSL_SESSION *s, + uint32_t max_early_data); +__owur int SSL_copy_session_id(SSL *to, const SSL *from); +__owur X509 *SSL_SESSION_get0_peer(SSL_SESSION *s); +__owur int SSL_SESSION_set1_id_context(SSL_SESSION *s, + const unsigned char *sid_ctx, + unsigned int sid_ctx_len); +__owur int SSL_SESSION_set1_id(SSL_SESSION *s, const unsigned char *sid, + unsigned int sid_len); +__owur int SSL_SESSION_is_resumable(const SSL_SESSION *s); + +__owur SSL_SESSION *SSL_SESSION_new(void); +__owur SSL_SESSION *SSL_SESSION_dup(const SSL_SESSION *src); +const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, + unsigned int *len); +const unsigned char *SSL_SESSION_get0_id_context(const SSL_SESSION *s, + unsigned int *len); +__owur unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s); +# ifndef OPENSSL_NO_STDIO +int SSL_SESSION_print_fp(FILE *fp, const SSL_SESSION *ses); +# endif +int SSL_SESSION_print(BIO *fp, const SSL_SESSION *ses); +int SSL_SESSION_print_keylog(BIO *bp, const SSL_SESSION *x); +int SSL_SESSION_up_ref(SSL_SESSION *ses); +void SSL_SESSION_free(SSL_SESSION *ses); +__owur int i2d_SSL_SESSION(const SSL_SESSION *in, unsigned char **pp); +__owur int SSL_set_session(SSL *to, SSL_SESSION *session); +int SSL_CTX_add_session(SSL_CTX *ctx, SSL_SESSION *session); +int SSL_CTX_remove_session(SSL_CTX *ctx, SSL_SESSION *session); +__owur int SSL_CTX_set_generate_session_id(SSL_CTX *ctx, GEN_SESSION_CB cb); +__owur int SSL_set_generate_session_id(SSL *s, GEN_SESSION_CB cb); +__owur int SSL_has_matching_session_id(const SSL *s, + const unsigned char *id, + unsigned int id_len); +SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, + long length); +SSL_SESSION *d2i_SSL_SESSION_ex(SSL_SESSION **a, const unsigned char **pp, + long length, OSSL_LIB_CTX *libctx, + const char *propq); + +# ifdef OPENSSL_X509_H +__owur X509 *SSL_get0_peer_certificate(const SSL *s); +__owur X509 *SSL_get1_peer_certificate(const SSL *s); +/* Deprecated in 3.0.0 */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_get_peer_certificate SSL_get1_peer_certificate +# endif +# endif + +__owur STACK_OF(X509) *SSL_get_peer_cert_chain(const SSL *s); + +__owur int SSL_CTX_get_verify_mode(const SSL_CTX *ctx); +__owur int SSL_CTX_get_verify_depth(const SSL_CTX *ctx); +__owur SSL_verify_cb SSL_CTX_get_verify_callback(const SSL_CTX *ctx); +void SSL_CTX_set_verify(SSL_CTX *ctx, int mode, SSL_verify_cb callback); +void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth); +void SSL_CTX_set_cert_verify_callback(SSL_CTX *ctx, + int (*cb) (X509_STORE_CTX *, void *), + void *arg); +void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg), + void *arg); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey(SSL_CTX *ctx, RSA *rsa); +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey_ASN1(SSL_CTX *ctx, const unsigned char *d, + long len); +# endif +__owur int SSL_CTX_use_PrivateKey(SSL_CTX *ctx, EVP_PKEY *pkey); +__owur int SSL_CTX_use_PrivateKey_ASN1(int pk, SSL_CTX *ctx, + const unsigned char *d, long len); +__owur int SSL_CTX_use_certificate(SSL_CTX *ctx, X509 *x); +__owur int SSL_CTX_use_certificate_ASN1(SSL_CTX *ctx, int len, + const unsigned char *d); +__owur int SSL_CTX_use_cert_and_key(SSL_CTX *ctx, X509 *x509, EVP_PKEY *privatekey, + STACK_OF(X509) *chain, int override); + +void SSL_CTX_set_default_passwd_cb(SSL_CTX *ctx, pem_password_cb *cb); +void SSL_CTX_set_default_passwd_cb_userdata(SSL_CTX *ctx, void *u); +pem_password_cb *SSL_CTX_get_default_passwd_cb(SSL_CTX *ctx); +void *SSL_CTX_get_default_passwd_cb_userdata(SSL_CTX *ctx); +void SSL_set_default_passwd_cb(SSL *s, pem_password_cb *cb); +void SSL_set_default_passwd_cb_userdata(SSL *s, void *u); +pem_password_cb *SSL_get_default_passwd_cb(SSL *s); +void *SSL_get_default_passwd_cb_userdata(SSL *s); + +__owur int SSL_CTX_check_private_key(const SSL_CTX *ctx); +__owur int SSL_check_private_key(const SSL *ctx); + +__owur int SSL_CTX_set_session_id_context(SSL_CTX *ctx, + const unsigned char *sid_ctx, + unsigned int sid_ctx_len); + +SSL *SSL_new(SSL_CTX *ctx); +int SSL_up_ref(SSL *s); +int SSL_is_dtls(const SSL *s); +int SSL_is_tls(const SSL *s); +int SSL_is_quic(const SSL *s); +__owur int SSL_set_session_id_context(SSL *ssl, const unsigned char *sid_ctx, + unsigned int sid_ctx_len); + +__owur int SSL_CTX_set_purpose(SSL_CTX *ctx, int purpose); +__owur int SSL_set_purpose(SSL *ssl, int purpose); +__owur int SSL_CTX_set_trust(SSL_CTX *ctx, int trust); +__owur int SSL_set_trust(SSL *ssl, int trust); + +__owur int SSL_set1_host(SSL *s, const char *hostname); +__owur int SSL_add1_host(SSL *s, const char *hostname); +__owur const char *SSL_get0_peername(SSL *s); +void SSL_set_hostflags(SSL *s, unsigned int flags); + +__owur int SSL_CTX_dane_enable(SSL_CTX *ctx); +__owur int SSL_CTX_dane_mtype_set(SSL_CTX *ctx, const EVP_MD *md, + uint8_t mtype, uint8_t ord); +__owur int SSL_dane_enable(SSL *s, const char *basedomain); +__owur int SSL_dane_tlsa_add(SSL *s, uint8_t usage, uint8_t selector, + uint8_t mtype, const unsigned char *data, size_t dlen); +__owur int SSL_get0_dane_authority(SSL *s, X509 **mcert, EVP_PKEY **mspki); +__owur int SSL_get0_dane_tlsa(SSL *s, uint8_t *usage, uint8_t *selector, + uint8_t *mtype, const unsigned char **data, + size_t *dlen); +/* + * Bridge opacity barrier between libcrypt and libssl, also needed to support + * offline testing in test/danetest.c + */ +SSL_DANE *SSL_get0_dane(SSL *ssl); +/* + * DANE flags + */ +unsigned long SSL_CTX_dane_set_flags(SSL_CTX *ctx, unsigned long flags); +unsigned long SSL_CTX_dane_clear_flags(SSL_CTX *ctx, unsigned long flags); +unsigned long SSL_dane_set_flags(SSL *ssl, unsigned long flags); +unsigned long SSL_dane_clear_flags(SSL *ssl, unsigned long flags); + +__owur int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm); +__owur int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm); + +__owur X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx); +__owur X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl); + +# ifndef OPENSSL_NO_SRP +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_username(SSL_CTX *ctx, char *name); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_password(SSL_CTX *ctx, char *password); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, + char *(*cb) (SSL *, void *)); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, + int (*cb) (SSL *, void *)); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx, + int (*cb) (SSL *, int *, void *)); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg); + +OSSL_DEPRECATEDIN_3_0 +int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g, + BIGNUM *sa, BIGNUM *v, char *info); +OSSL_DEPRECATEDIN_3_0 +int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, + const char *grp); + +OSSL_DEPRECATEDIN_3_0 __owur BIGNUM *SSL_get_srp_g(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur BIGNUM *SSL_get_srp_N(SSL *s); + +OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_username(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_userinfo(SSL *s); +# endif +# endif + +/* + * ClientHello callback and helpers. + */ + +# define SSL_CLIENT_HELLO_SUCCESS 1 +# define SSL_CLIENT_HELLO_ERROR 0 +# define SSL_CLIENT_HELLO_RETRY (-1) + +typedef int (*SSL_client_hello_cb_fn) (SSL *s, int *al, void *arg); +void SSL_CTX_set_client_hello_cb(SSL_CTX *c, SSL_client_hello_cb_fn cb, + void *arg); +int SSL_client_hello_isv2(SSL *s); +unsigned int SSL_client_hello_get0_legacy_version(SSL *s); +size_t SSL_client_hello_get0_random(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_session_id(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_ciphers(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_compression_methods(SSL *s, + const unsigned char **out); +int SSL_client_hello_get1_extensions_present(SSL *s, int **out, size_t *outlen); +int SSL_client_hello_get_extension_order(SSL *s, uint16_t *exts, + size_t *num_exts); +int SSL_client_hello_get0_ext(SSL *s, unsigned int type, + const unsigned char **out, size_t *outlen); + +void SSL_certs_clear(SSL *s); +void SSL_free(SSL *ssl); +# ifdef OSSL_ASYNC_FD +/* + * Windows application developer has to include windows.h to use these. + */ +__owur int SSL_waiting_for_async(SSL *s); +__owur int SSL_get_all_async_fds(SSL *s, OSSL_ASYNC_FD *fds, size_t *numfds); +__owur int SSL_get_changed_async_fds(SSL *s, OSSL_ASYNC_FD *addfd, + size_t *numaddfds, OSSL_ASYNC_FD *delfd, + size_t *numdelfds); +__owur int SSL_CTX_set_async_callback(SSL_CTX *ctx, SSL_async_callback_fn callback); +__owur int SSL_CTX_set_async_callback_arg(SSL_CTX *ctx, void *arg); +__owur int SSL_set_async_callback(SSL *s, SSL_async_callback_fn callback); +__owur int SSL_set_async_callback_arg(SSL *s, void *arg); +__owur int SSL_get_async_status(SSL *s, int *status); + +# endif +__owur int SSL_accept(SSL *ssl); +__owur int SSL_stateless(SSL *s); +__owur int SSL_connect(SSL *ssl); +__owur int SSL_read(SSL *ssl, void *buf, int num); +__owur int SSL_read_ex(SSL *ssl, void *buf, size_t num, size_t *readbytes); + +# define SSL_READ_EARLY_DATA_ERROR 0 +# define SSL_READ_EARLY_DATA_SUCCESS 1 +# define SSL_READ_EARLY_DATA_FINISH 2 + +__owur int SSL_read_early_data(SSL *s, void *buf, size_t num, + size_t *readbytes); +__owur int SSL_peek(SSL *ssl, void *buf, int num); +__owur int SSL_peek_ex(SSL *ssl, void *buf, size_t num, size_t *readbytes); +__owur ossl_ssize_t SSL_sendfile(SSL *s, int fd, off_t offset, size_t size, + int flags); +__owur int SSL_write(SSL *ssl, const void *buf, int num); +__owur int SSL_write_ex(SSL *s, const void *buf, size_t num, size_t *written); +__owur int SSL_write_early_data(SSL *s, const void *buf, size_t num, + size_t *written); +long SSL_ctrl(SSL *ssl, int cmd, long larg, void *parg); +long SSL_callback_ctrl(SSL *, int, void (*)(void)); +long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg); +long SSL_CTX_callback_ctrl(SSL_CTX *, int, void (*)(void)); + +# define SSL_EARLY_DATA_NOT_SENT 0 +# define SSL_EARLY_DATA_REJECTED 1 +# define SSL_EARLY_DATA_ACCEPTED 2 + +__owur int SSL_get_early_data_status(const SSL *s); + +__owur int SSL_get_error(const SSL *s, int ret_code); +__owur const char *SSL_get_version(const SSL *s); +__owur int SSL_get_handshake_rtt(const SSL *s, uint64_t *rtt); + +/* This sets the 'default' SSL version that SSL_new() will create */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_set_ssl_version(SSL_CTX *ctx, const SSL_METHOD *meth); +# endif + +# ifndef OPENSSL_NO_SSL3_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_method(void); /* SSLv3 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_client_method(void); +# endif +# endif + +#define SSLv23_method TLS_method +#define SSLv23_server_method TLS_server_method +#define SSLv23_client_method TLS_client_method + +/* Negotiate highest available SSL/TLS version */ +__owur const SSL_METHOD *TLS_method(void); +__owur const SSL_METHOD *TLS_server_method(void); +__owur const SSL_METHOD *TLS_client_method(void); + +# ifndef OPENSSL_NO_TLS1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_method(void); /* TLSv1.0 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_TLS1_1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_method(void); /* TLSv1.1 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_TLS1_2_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_method(void); /* TLSv1.2 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_DTLS1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_DTLS1_2_METHOD +/* DTLSv1.2 */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_client_method(void); +# endif +# endif + +__owur const SSL_METHOD *DTLS_method(void); /* DTLS 1.0 and 1.2 */ +__owur const SSL_METHOD *DTLS_server_method(void); /* DTLS 1.0 and 1.2 */ +__owur const SSL_METHOD *DTLS_client_method(void); /* DTLS 1.0 and 1.2 */ + +__owur size_t DTLS_get_data_mtu(const SSL *s); + +__owur STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s); +__owur STACK_OF(SSL_CIPHER) *SSL_CTX_get_ciphers(const SSL_CTX *ctx); +__owur STACK_OF(SSL_CIPHER) *SSL_get_client_ciphers(const SSL *s); +__owur STACK_OF(SSL_CIPHER) *SSL_get1_supported_ciphers(SSL *s); + +__owur int SSL_do_handshake(SSL *s); +int SSL_key_update(SSL *s, int updatetype); +int SSL_get_key_update_type(const SSL *s); +int SSL_renegotiate(SSL *s); +int SSL_renegotiate_abbreviated(SSL *s); +__owur int SSL_renegotiate_pending(const SSL *s); +int SSL_new_session_ticket(SSL *s); +int SSL_shutdown(SSL *s); +__owur int SSL_verify_client_post_handshake(SSL *s); +void SSL_CTX_set_post_handshake_auth(SSL_CTX *ctx, int val); +void SSL_set_post_handshake_auth(SSL *s, int val); + +__owur const SSL_METHOD *SSL_CTX_get_ssl_method(const SSL_CTX *ctx); +__owur const SSL_METHOD *SSL_get_ssl_method(const SSL *s); +__owur int SSL_set_ssl_method(SSL *s, const SSL_METHOD *method); +__owur const char *SSL_alert_type_string_long(int value); +__owur const char *SSL_alert_type_string(int value); +__owur const char *SSL_alert_desc_string_long(int value); +__owur const char *SSL_alert_desc_string(int value); + +void SSL_set0_CA_list(SSL *s, STACK_OF(X509_NAME) *name_list); +void SSL_CTX_set0_CA_list(SSL_CTX *ctx, STACK_OF(X509_NAME) *name_list); +__owur const STACK_OF(X509_NAME) *SSL_get0_CA_list(const SSL *s); +__owur const STACK_OF(X509_NAME) *SSL_CTX_get0_CA_list(const SSL_CTX *ctx); +__owur int SSL_add1_to_CA_list(SSL *ssl, const X509 *x); +__owur int SSL_CTX_add1_to_CA_list(SSL_CTX *ctx, const X509 *x); +__owur const STACK_OF(X509_NAME) *SSL_get0_peer_CA_list(const SSL *s); + +void SSL_set_client_CA_list(SSL *s, STACK_OF(X509_NAME) *name_list); +void SSL_CTX_set_client_CA_list(SSL_CTX *ctx, STACK_OF(X509_NAME) *name_list); +__owur STACK_OF(X509_NAME) *SSL_get_client_CA_list(const SSL *s); +__owur STACK_OF(X509_NAME) *SSL_CTX_get_client_CA_list(const SSL_CTX *s); +__owur int SSL_add_client_CA(SSL *ssl, X509 *x); +__owur int SSL_CTX_add_client_CA(SSL_CTX *ctx, X509 *x); + +void SSL_set_connect_state(SSL *s); +void SSL_set_accept_state(SSL *s); + +__owur long SSL_get_default_timeout(const SSL *s); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_library_init() OPENSSL_init_ssl(0, NULL) +# endif + +__owur char *SSL_CIPHER_description(const SSL_CIPHER *, char *buf, int size); +__owur STACK_OF(X509_NAME) *SSL_dup_CA_list(const STACK_OF(X509_NAME) *sk); + +__owur SSL *SSL_dup(SSL *ssl); + +__owur X509 *SSL_get_certificate(const SSL *ssl); +/* + * EVP_PKEY + */ +struct evp_pkey_st *SSL_get_privatekey(const SSL *ssl); + +__owur X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx); +__owur EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx); + +void SSL_CTX_set_quiet_shutdown(SSL_CTX *ctx, int mode); +__owur int SSL_CTX_get_quiet_shutdown(const SSL_CTX *ctx); +void SSL_set_quiet_shutdown(SSL *ssl, int mode); +__owur int SSL_get_quiet_shutdown(const SSL *ssl); +void SSL_set_shutdown(SSL *ssl, int mode); +__owur int SSL_get_shutdown(const SSL *ssl); +__owur int SSL_version(const SSL *ssl); +__owur int SSL_client_version(const SSL *s); +__owur int SSL_CTX_set_default_verify_paths(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_dir(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_file(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_store(SSL_CTX *ctx); +__owur int SSL_CTX_load_verify_file(SSL_CTX *ctx, const char *CAfile); +__owur int SSL_CTX_load_verify_dir(SSL_CTX *ctx, const char *CApath); +__owur int SSL_CTX_load_verify_store(SSL_CTX *ctx, const char *CAstore); +__owur int SSL_CTX_load_verify_locations(SSL_CTX *ctx, + const char *CAfile, + const char *CApath); +# define SSL_get0_session SSL_get_session/* just peek at pointer */ +__owur SSL_SESSION *SSL_get_session(const SSL *ssl); +__owur SSL_SESSION *SSL_get1_session(SSL *ssl); /* obtain a reference count */ +__owur SSL_CTX *SSL_get_SSL_CTX(const SSL *ssl); +SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX *ctx); +void SSL_set_info_callback(SSL *ssl, + void (*cb) (const SSL *ssl, int type, int val)); +void (*SSL_get_info_callback(const SSL *ssl)) (const SSL *ssl, int type, + int val); +__owur OSSL_HANDSHAKE_STATE SSL_get_state(const SSL *ssl); + +void SSL_set_verify_result(SSL *ssl, long v); +__owur long SSL_get_verify_result(const SSL *ssl); +__owur STACK_OF(X509) *SSL_get0_verified_chain(const SSL *s); + +__owur size_t SSL_get_client_random(const SSL *ssl, unsigned char *out, + size_t outlen); +__owur size_t SSL_get_server_random(const SSL *ssl, unsigned char *out, + size_t outlen); +__owur size_t SSL_SESSION_get_master_key(const SSL_SESSION *sess, + unsigned char *out, size_t outlen); +__owur int SSL_SESSION_set1_master_key(SSL_SESSION *sess, + const unsigned char *in, size_t len); +uint8_t SSL_SESSION_get_max_fragment_length(const SSL_SESSION *sess); + +#define SSL_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL, l, p, newf, dupf, freef) +__owur int SSL_set_ex_data(SSL *ssl, int idx, void *data); +void *SSL_get_ex_data(const SSL *ssl, int idx); +#define SSL_SESSION_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL_SESSION, l, p, newf, dupf, freef) +__owur int SSL_SESSION_set_ex_data(SSL_SESSION *ss, int idx, void *data); +void *SSL_SESSION_get_ex_data(const SSL_SESSION *ss, int idx); +#define SSL_CTX_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL_CTX, l, p, newf, dupf, freef) +__owur int SSL_CTX_set_ex_data(SSL_CTX *ssl, int idx, void *data); +void *SSL_CTX_get_ex_data(const SSL_CTX *ssl, int idx); + +__owur int SSL_get_ex_data_X509_STORE_CTX_idx(void); + +# define SSL_CTX_sess_set_cache_size(ctx,t) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SESS_CACHE_SIZE,t,NULL) +# define SSL_CTX_sess_get_cache_size(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_SESS_CACHE_SIZE,0,NULL) +# define SSL_CTX_set_session_cache_mode(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SESS_CACHE_MODE,m,NULL) +# define SSL_CTX_get_session_cache_mode(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_SESS_CACHE_MODE,0,NULL) + +# define SSL_CTX_get_default_read_ahead(ctx) SSL_CTX_get_read_ahead(ctx) +# define SSL_CTX_set_default_read_ahead(ctx,m) SSL_CTX_set_read_ahead(ctx,m) +# define SSL_CTX_get_read_ahead(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_READ_AHEAD,0,NULL) +# define SSL_CTX_set_read_ahead(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_READ_AHEAD,m,NULL) +# define SSL_CTX_get_max_cert_list(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_MAX_CERT_LIST,0,NULL) +# define SSL_CTX_set_max_cert_list(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_CERT_LIST,m,NULL) +# define SSL_get_max_cert_list(ssl) \ + SSL_ctrl(ssl,SSL_CTRL_GET_MAX_CERT_LIST,0,NULL) +# define SSL_set_max_cert_list(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_CERT_LIST,m,NULL) + +# define SSL_CTX_set_max_send_fragment(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_SEND_FRAGMENT,m,NULL) +# define SSL_set_max_send_fragment(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_SEND_FRAGMENT,m,NULL) +# define SSL_CTX_set_split_send_fragment(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SPLIT_SEND_FRAGMENT,m,NULL) +# define SSL_set_split_send_fragment(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_SPLIT_SEND_FRAGMENT,m,NULL) +# define SSL_CTX_set_max_pipelines(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_PIPELINES,m,NULL) +# define SSL_set_max_pipelines(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_PIPELINES,m,NULL) +# define SSL_set_retry_verify(ssl) \ + (SSL_ctrl(ssl,SSL_CTRL_SET_RETRY_VERIFY,0,NULL) > 0) + +void SSL_CTX_set_default_read_buffer_len(SSL_CTX *ctx, size_t len); +void SSL_set_default_read_buffer_len(SSL *s, size_t len); + +# ifndef OPENSSL_NO_DH +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* NB: the |keylength| is only applicable when is_export is true */ +OSSL_DEPRECATEDIN_3_0 +void SSL_CTX_set_tmp_dh_callback(SSL_CTX *ctx, + DH *(*dh) (SSL *ssl, int is_export, + int keylength)); +OSSL_DEPRECATEDIN_3_0 +void SSL_set_tmp_dh_callback(SSL *ssl, + DH *(*dh) (SSL *ssl, int is_export, + int keylength)); +# endif +# endif + +__owur const COMP_METHOD *SSL_get_current_compression(const SSL *s); +__owur const COMP_METHOD *SSL_get_current_expansion(const SSL *s); +__owur const char *SSL_COMP_get_name(const COMP_METHOD *comp); +__owur const char *SSL_COMP_get0_name(const SSL_COMP *comp); +__owur int SSL_COMP_get_id(const SSL_COMP *comp); +STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void); +__owur STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) + *meths); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_COMP_free_compression_methods() while(0) continue +# endif +__owur int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm); + +const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr); +int SSL_CIPHER_get_cipher_nid(const SSL_CIPHER *c); +int SSL_CIPHER_get_digest_nid(const SSL_CIPHER *c); +int SSL_bytes_to_cipher_list(SSL *s, const unsigned char *bytes, size_t len, + int isv2format, STACK_OF(SSL_CIPHER) **sk, + STACK_OF(SSL_CIPHER) **scsvs); + +/* TLS extensions functions */ +__owur int SSL_set_session_ticket_ext(SSL *s, void *ext_data, int ext_len); + +__owur int SSL_set_session_ticket_ext_cb(SSL *s, + tls_session_ticket_ext_cb_fn cb, + void *arg); + +/* Pre-shared secret session resumption functions */ +__owur int SSL_set_session_secret_cb(SSL *s, + tls_session_secret_cb_fn session_secret_cb, + void *arg); + +void SSL_CTX_set_not_resumable_session_callback(SSL_CTX *ctx, + int (*cb) (SSL *ssl, + int + is_forward_secure)); + +void SSL_set_not_resumable_session_callback(SSL *ssl, + int (*cb) (SSL *ssl, + int is_forward_secure)); + +void SSL_CTX_set_record_padding_callback(SSL_CTX *ctx, + size_t (*cb) (SSL *ssl, int type, + size_t len, void *arg)); +void SSL_CTX_set_record_padding_callback_arg(SSL_CTX *ctx, void *arg); +void *SSL_CTX_get_record_padding_callback_arg(const SSL_CTX *ctx); +int SSL_CTX_set_block_padding(SSL_CTX *ctx, size_t block_size); + +int SSL_set_record_padding_callback(SSL *ssl, + size_t (*cb) (SSL *ssl, int type, + size_t len, void *arg)); +void SSL_set_record_padding_callback_arg(SSL *ssl, void *arg); +void *SSL_get_record_padding_callback_arg(const SSL *ssl); +int SSL_set_block_padding(SSL *ssl, size_t block_size); + +int SSL_set_num_tickets(SSL *s, size_t num_tickets); +size_t SSL_get_num_tickets(const SSL *s); +int SSL_CTX_set_num_tickets(SSL_CTX *ctx, size_t num_tickets); +size_t SSL_CTX_get_num_tickets(const SSL_CTX *ctx); + +/* QUIC support */ +int SSL_handle_events(SSL *s); +__owur int SSL_get_event_timeout(SSL *s, struct timeval *tv, int *is_infinite); +__owur int SSL_get_rpoll_descriptor(SSL *s, BIO_POLL_DESCRIPTOR *desc); +__owur int SSL_get_wpoll_descriptor(SSL *s, BIO_POLL_DESCRIPTOR *desc); +__owur int SSL_net_read_desired(SSL *s); +__owur int SSL_net_write_desired(SSL *s); +__owur int SSL_set_blocking_mode(SSL *s, int blocking); +__owur int SSL_get_blocking_mode(SSL *s); +__owur int SSL_set1_initial_peer_addr(SSL *s, const BIO_ADDR *peer_addr); +__owur SSL *SSL_get0_connection(SSL *s); +__owur int SSL_is_connection(SSL *s); + +#define SSL_STREAM_TYPE_NONE 0 +#define SSL_STREAM_TYPE_READ (1U << 0) +#define SSL_STREAM_TYPE_WRITE (1U << 1) +#define SSL_STREAM_TYPE_BIDI (SSL_STREAM_TYPE_READ | SSL_STREAM_TYPE_WRITE) +__owur int SSL_get_stream_type(SSL *s); + +__owur uint64_t SSL_get_stream_id(SSL *s); +__owur int SSL_is_stream_local(SSL *s); + +#define SSL_DEFAULT_STREAM_MODE_NONE 0 +#define SSL_DEFAULT_STREAM_MODE_AUTO_BIDI 1 +#define SSL_DEFAULT_STREAM_MODE_AUTO_UNI 2 +__owur int SSL_set_default_stream_mode(SSL *s, uint32_t mode); + +#define SSL_STREAM_FLAG_UNI (1U << 0) +#define SSL_STREAM_FLAG_NO_BLOCK (1U << 1) +#define SSL_STREAM_FLAG_ADVANCE (1U << 2) +__owur SSL *SSL_new_stream(SSL *s, uint64_t flags); + +#define SSL_INCOMING_STREAM_POLICY_AUTO 0 +#define SSL_INCOMING_STREAM_POLICY_ACCEPT 1 +#define SSL_INCOMING_STREAM_POLICY_REJECT 2 +__owur int SSL_set_incoming_stream_policy(SSL *s, int policy, uint64_t aec); + +#define SSL_ACCEPT_STREAM_NO_BLOCK (1U << 0) +__owur SSL *SSL_accept_stream(SSL *s, uint64_t flags); +__owur size_t SSL_get_accept_stream_queue_len(SSL *s); + +# ifndef OPENSSL_NO_QUIC +__owur int SSL_inject_net_dgram(SSL *s, const unsigned char *buf, + size_t buf_len, + const BIO_ADDR *peer, + const BIO_ADDR *local); +# endif + +typedef struct ssl_shutdown_ex_args_st { + uint64_t quic_error_code; + const char *quic_reason; +} SSL_SHUTDOWN_EX_ARGS; + +#define SSL_SHUTDOWN_FLAG_RAPID (1U << 0) +#define SSL_SHUTDOWN_FLAG_NO_STREAM_FLUSH (1U << 1) +#define SSL_SHUTDOWN_FLAG_NO_BLOCK (1U << 2) +#define SSL_SHUTDOWN_FLAG_WAIT_PEER (1U << 3) + +__owur int SSL_shutdown_ex(SSL *ssl, uint64_t flags, + const SSL_SHUTDOWN_EX_ARGS *args, + size_t args_len); + +__owur int SSL_stream_conclude(SSL *ssl, uint64_t flags); + +typedef struct ssl_stream_reset_args_st { + uint64_t quic_error_code; +} SSL_STREAM_RESET_ARGS; + +__owur int SSL_stream_reset(SSL *ssl, + const SSL_STREAM_RESET_ARGS *args, + size_t args_len); + +#define SSL_STREAM_STATE_NONE 0 +#define SSL_STREAM_STATE_OK 1 +#define SSL_STREAM_STATE_WRONG_DIR 2 +#define SSL_STREAM_STATE_FINISHED 3 +#define SSL_STREAM_STATE_RESET_LOCAL 4 +#define SSL_STREAM_STATE_RESET_REMOTE 5 +#define SSL_STREAM_STATE_CONN_CLOSED 6 +__owur int SSL_get_stream_read_state(SSL *ssl); +__owur int SSL_get_stream_write_state(SSL *ssl); + +__owur int SSL_get_stream_read_error_code(SSL *ssl, uint64_t *app_error_code); +__owur int SSL_get_stream_write_error_code(SSL *ssl, uint64_t *app_error_code); + +#define SSL_CONN_CLOSE_FLAG_LOCAL (1U << 0) +#define SSL_CONN_CLOSE_FLAG_TRANSPORT (1U << 1) + +typedef struct ssl_conn_close_info_st { + uint64_t error_code, frame_type; + const char *reason; + size_t reason_len; + uint32_t flags; +} SSL_CONN_CLOSE_INFO; + +__owur int SSL_get_conn_close_info(SSL *ssl, + SSL_CONN_CLOSE_INFO *info, + size_t info_len); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_cache_hit(s) SSL_session_reused(s) +# endif + +__owur int SSL_session_reused(const SSL *s); +__owur int SSL_is_server(const SSL *s); + +__owur __owur SSL_CONF_CTX *SSL_CONF_CTX_new(void); +int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx); +void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx); +unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags); +__owur unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, + unsigned int flags); +__owur int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre); + +void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl); +void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx); + +__owur int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value); +__owur int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv); +__owur int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd); + +void SSL_add_ssl_module(void); +int SSL_config(SSL *s, const char *name); +int SSL_CTX_config(SSL_CTX *ctx, const char *name); + +# ifndef OPENSSL_NO_SSL_TRACE +void SSL_trace(int write_p, int version, int content_type, + const void *buf, size_t len, SSL *ssl, void *arg); +# endif + +# ifndef OPENSSL_NO_SOCK +int DTLSv1_listen(SSL *s, BIO_ADDR *client); +# endif + +# ifndef OPENSSL_NO_CT + +/* + * A callback for verifying that the received SCTs are sufficient. + * Expected to return 1 if they are sufficient, otherwise 0. + * May return a negative integer if an error occurs. + * A connection should be aborted if the SCTs are deemed insufficient. + */ +typedef int (*ssl_ct_validation_cb)(const CT_POLICY_EVAL_CTX *ctx, + const STACK_OF(SCT) *scts, void *arg); + +/* + * Sets a |callback| that is invoked upon receipt of ServerHelloDone to validate + * the received SCTs. + * If the callback returns a non-positive result, the connection is terminated. + * Call this function before beginning a handshake. + * If a NULL |callback| is provided, SCT validation is disabled. + * |arg| is arbitrary userdata that will be passed to the callback whenever it + * is invoked. Ownership of |arg| remains with the caller. + * + * NOTE: A side-effect of setting a CT callback is that an OCSP stapled response + * will be requested. + */ +int SSL_set_ct_validation_callback(SSL *s, ssl_ct_validation_cb callback, + void *arg); +int SSL_CTX_set_ct_validation_callback(SSL_CTX *ctx, + ssl_ct_validation_cb callback, + void *arg); +#define SSL_disable_ct(s) \ + ((void) SSL_set_validation_callback((s), NULL, NULL)) +#define SSL_CTX_disable_ct(ctx) \ + ((void) SSL_CTX_set_validation_callback((ctx), NULL, NULL)) + +/* + * The validation type enumerates the available behaviours of the built-in SSL + * CT validation callback selected via SSL_enable_ct() and SSL_CTX_enable_ct(). + * The underlying callback is a static function in libssl. + */ +enum { + SSL_CT_VALIDATION_PERMISSIVE = 0, + SSL_CT_VALIDATION_STRICT +}; + +/* + * Enable CT by setting up a callback that implements one of the built-in + * validation variants. The SSL_CT_VALIDATION_PERMISSIVE variant always + * continues the handshake, the application can make appropriate decisions at + * handshake completion. The SSL_CT_VALIDATION_STRICT variant requires at + * least one valid SCT, or else handshake termination will be requested. The + * handshake may continue anyway if SSL_VERIFY_NONE is in effect. + */ +int SSL_enable_ct(SSL *s, int validation_mode); +int SSL_CTX_enable_ct(SSL_CTX *ctx, int validation_mode); + +/* + * Report whether a non-NULL callback is enabled. + */ +int SSL_ct_is_enabled(const SSL *s); +int SSL_CTX_ct_is_enabled(const SSL_CTX *ctx); + +/* Gets the SCTs received from a connection */ +const STACK_OF(SCT) *SSL_get0_peer_scts(SSL *s); + +/* + * Loads the CT log list from the default location. + * If a CTLOG_STORE has previously been set using SSL_CTX_set_ctlog_store, + * the log information loaded from this file will be appended to the + * CTLOG_STORE. + * Returns 1 on success, 0 otherwise. + */ +int SSL_CTX_set_default_ctlog_list_file(SSL_CTX *ctx); + +/* + * Loads the CT log list from the specified file path. + * If a CTLOG_STORE has previously been set using SSL_CTX_set_ctlog_store, + * the log information loaded from this file will be appended to the + * CTLOG_STORE. + * Returns 1 on success, 0 otherwise. + */ +int SSL_CTX_set_ctlog_list_file(SSL_CTX *ctx, const char *path); + +/* + * Sets the CT log list used by all SSL connections created from this SSL_CTX. + * Ownership of the CTLOG_STORE is transferred to the SSL_CTX. + */ +void SSL_CTX_set0_ctlog_store(SSL_CTX *ctx, CTLOG_STORE *logs); + +/* + * Gets the CT log list used by all SSL connections created from this SSL_CTX. + * This will be NULL unless one of the following functions has been called: + * - SSL_CTX_set_default_ctlog_list_file + * - SSL_CTX_set_ctlog_list_file + * - SSL_CTX_set_ctlog_store + */ +const CTLOG_STORE *SSL_CTX_get0_ctlog_store(const SSL_CTX *ctx); + +# endif /* OPENSSL_NO_CT */ + +/* What the "other" parameter contains in security callback */ +/* Mask for type */ +# define SSL_SECOP_OTHER_TYPE 0xffff0000 +# define SSL_SECOP_OTHER_NONE 0 +# define SSL_SECOP_OTHER_CIPHER (1 << 16) +# define SSL_SECOP_OTHER_CURVE (2 << 16) +# define SSL_SECOP_OTHER_DH (3 << 16) +# define SSL_SECOP_OTHER_PKEY (4 << 16) +# define SSL_SECOP_OTHER_SIGALG (5 << 16) +# define SSL_SECOP_OTHER_CERT (6 << 16) + +/* Indicated operation refers to peer key or certificate */ +# define SSL_SECOP_PEER 0x1000 + +/* Values for "op" parameter in security callback */ + +/* Called to filter ciphers */ +/* Ciphers client supports */ +# define SSL_SECOP_CIPHER_SUPPORTED (1 | SSL_SECOP_OTHER_CIPHER) +/* Cipher shared by client/server */ +# define SSL_SECOP_CIPHER_SHARED (2 | SSL_SECOP_OTHER_CIPHER) +/* Sanity check of cipher server selects */ +# define SSL_SECOP_CIPHER_CHECK (3 | SSL_SECOP_OTHER_CIPHER) +/* Curves supported by client */ +# define SSL_SECOP_CURVE_SUPPORTED (4 | SSL_SECOP_OTHER_CURVE) +/* Curves shared by client/server */ +# define SSL_SECOP_CURVE_SHARED (5 | SSL_SECOP_OTHER_CURVE) +/* Sanity check of curve server selects */ +# define SSL_SECOP_CURVE_CHECK (6 | SSL_SECOP_OTHER_CURVE) +/* Temporary DH key */ +# define SSL_SECOP_TMP_DH (7 | SSL_SECOP_OTHER_PKEY) +/* SSL/TLS version */ +# define SSL_SECOP_VERSION (9 | SSL_SECOP_OTHER_NONE) +/* Session tickets */ +# define SSL_SECOP_TICKET (10 | SSL_SECOP_OTHER_NONE) +/* Supported signature algorithms sent to peer */ +# define SSL_SECOP_SIGALG_SUPPORTED (11 | SSL_SECOP_OTHER_SIGALG) +/* Shared signature algorithm */ +# define SSL_SECOP_SIGALG_SHARED (12 | SSL_SECOP_OTHER_SIGALG) +/* Sanity check signature algorithm allowed */ +# define SSL_SECOP_SIGALG_CHECK (13 | SSL_SECOP_OTHER_SIGALG) +/* Used to get mask of supported public key signature algorithms */ +# define SSL_SECOP_SIGALG_MASK (14 | SSL_SECOP_OTHER_SIGALG) +/* Use to see if compression is allowed */ +# define SSL_SECOP_COMPRESSION (15 | SSL_SECOP_OTHER_NONE) +/* EE key in certificate */ +# define SSL_SECOP_EE_KEY (16 | SSL_SECOP_OTHER_CERT) +/* CA key in certificate */ +# define SSL_SECOP_CA_KEY (17 | SSL_SECOP_OTHER_CERT) +/* CA digest algorithm in certificate */ +# define SSL_SECOP_CA_MD (18 | SSL_SECOP_OTHER_CERT) +/* Peer EE key in certificate */ +# define SSL_SECOP_PEER_EE_KEY (SSL_SECOP_EE_KEY | SSL_SECOP_PEER) +/* Peer CA key in certificate */ +# define SSL_SECOP_PEER_CA_KEY (SSL_SECOP_CA_KEY | SSL_SECOP_PEER) +/* Peer CA digest algorithm in certificate */ +# define SSL_SECOP_PEER_CA_MD (SSL_SECOP_CA_MD | SSL_SECOP_PEER) + +void SSL_set_security_level(SSL *s, int level); +__owur int SSL_get_security_level(const SSL *s); +void SSL_set_security_callback(SSL *s, + int (*cb) (const SSL *s, const SSL_CTX *ctx, + int op, int bits, int nid, + void *other, void *ex)); +int (*SSL_get_security_callback(const SSL *s)) (const SSL *s, + const SSL_CTX *ctx, int op, + int bits, int nid, void *other, + void *ex); +void SSL_set0_security_ex_data(SSL *s, void *ex); +__owur void *SSL_get0_security_ex_data(const SSL *s); + +void SSL_CTX_set_security_level(SSL_CTX *ctx, int level); +__owur int SSL_CTX_get_security_level(const SSL_CTX *ctx); +void SSL_CTX_set_security_callback(SSL_CTX *ctx, + int (*cb) (const SSL *s, const SSL_CTX *ctx, + int op, int bits, int nid, + void *other, void *ex)); +int (*SSL_CTX_get_security_callback(const SSL_CTX *ctx)) (const SSL *s, + const SSL_CTX *ctx, + int op, int bits, + int nid, + void *other, + void *ex); +void SSL_CTX_set0_security_ex_data(SSL_CTX *ctx, void *ex); +__owur void *SSL_CTX_get0_security_ex_data(const SSL_CTX *ctx); + +/* OPENSSL_INIT flag 0x010000 reserved for internal use */ +# define OPENSSL_INIT_NO_LOAD_SSL_STRINGS 0x00100000L +# define OPENSSL_INIT_LOAD_SSL_STRINGS 0x00200000L + +# define OPENSSL_INIT_SSL_DEFAULT \ + (OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS) + +int OPENSSL_init_ssl(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings); + +# ifndef OPENSSL_NO_UNIT_TEST +__owur const struct openssl_ssl_test_functions *SSL_test_functions(void); +# endif + +__owur int SSL_free_buffers(SSL *ssl); +__owur int SSL_alloc_buffers(SSL *ssl); + +/* Status codes passed to the decrypt session ticket callback. Some of these + * are for internal use only and are never passed to the callback. */ +typedef int SSL_TICKET_STATUS; + +/* Support for ticket appdata */ +/* fatal error, malloc failure */ +# define SSL_TICKET_FATAL_ERR_MALLOC 0 +/* fatal error, either from parsing or decrypting the ticket */ +# define SSL_TICKET_FATAL_ERR_OTHER 1 +/* No ticket present */ +# define SSL_TICKET_NONE 2 +/* Empty ticket present */ +# define SSL_TICKET_EMPTY 3 +/* the ticket couldn't be decrypted */ +# define SSL_TICKET_NO_DECRYPT 4 +/* a ticket was successfully decrypted */ +# define SSL_TICKET_SUCCESS 5 +/* same as above but the ticket needs to be renewed */ +# define SSL_TICKET_SUCCESS_RENEW 6 + +/* Return codes for the decrypt session ticket callback */ +typedef int SSL_TICKET_RETURN; + +/* An error occurred */ +#define SSL_TICKET_RETURN_ABORT 0 +/* Do not use the ticket, do not send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_IGNORE 1 +/* Do not use the ticket, send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_IGNORE_RENEW 2 +/* Use the ticket, do not send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_USE 3 +/* Use the ticket, send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_USE_RENEW 4 + +typedef int (*SSL_CTX_generate_session_ticket_fn)(SSL *s, void *arg); +typedef SSL_TICKET_RETURN (*SSL_CTX_decrypt_session_ticket_fn)(SSL *s, SSL_SESSION *ss, + const unsigned char *keyname, + size_t keyname_length, + SSL_TICKET_STATUS status, + void *arg); +int SSL_CTX_set_session_ticket_cb(SSL_CTX *ctx, + SSL_CTX_generate_session_ticket_fn gen_cb, + SSL_CTX_decrypt_session_ticket_fn dec_cb, + void *arg); +int SSL_SESSION_set1_ticket_appdata(SSL_SESSION *ss, const void *data, size_t len); +int SSL_SESSION_get0_ticket_appdata(SSL_SESSION *ss, void **data, size_t *len); + +typedef unsigned int (*DTLS_timer_cb)(SSL *s, unsigned int timer_us); + +void DTLS_set_timer_cb(SSL *s, DTLS_timer_cb cb); + + +typedef int (*SSL_allow_early_data_cb_fn)(SSL *s, void *arg); +void SSL_CTX_set_allow_early_data_cb(SSL_CTX *ctx, + SSL_allow_early_data_cb_fn cb, + void *arg); +void SSL_set_allow_early_data_cb(SSL *s, + SSL_allow_early_data_cb_fn cb, + void *arg); + +/* store the default cipher strings inside the library */ +const char *OSSL_default_cipher_list(void); +const char *OSSL_default_ciphersuites(void); + +/* RFC8879 Certificate compression APIs */ + +int SSL_CTX_compress_certs(SSL_CTX *ctx, int alg); +int SSL_compress_certs(SSL *ssl, int alg); + +int SSL_CTX_set1_cert_comp_preference(SSL_CTX *ctx, int *algs, size_t len); +int SSL_set1_cert_comp_preference(SSL *ssl, int *algs, size_t len); + +int SSL_CTX_set1_compressed_cert(SSL_CTX *ctx, int algorithm, unsigned char *comp_data, + size_t comp_length, size_t orig_length); +int SSL_set1_compressed_cert(SSL *ssl, int algorithm, unsigned char *comp_data, + size_t comp_length, size_t orig_length); +size_t SSL_CTX_get1_compressed_cert(SSL_CTX *ctx, int alg, unsigned char **data, size_t *orig_len); +size_t SSL_get1_compressed_cert(SSL *ssl, int alg, unsigned char **data, size_t *orig_len); + +__owur int SSL_add_expected_rpk(SSL *s, EVP_PKEY *rpk); +__owur EVP_PKEY *SSL_get0_peer_rpk(const SSL *s); +__owur EVP_PKEY *SSL_SESSION_get0_peer_rpk(SSL_SESSION *s); +__owur int SSL_get_negotiated_client_cert_type(const SSL *s); +__owur int SSL_get_negotiated_server_cert_type(const SSL *s); + +__owur int SSL_set1_client_cert_type(SSL *s, const unsigned char *val, size_t len); +__owur int SSL_set1_server_cert_type(SSL *s, const unsigned char *val, size_t len); +__owur int SSL_CTX_set1_client_cert_type(SSL_CTX *ctx, const unsigned char *val, size_t len); +__owur int SSL_CTX_set1_server_cert_type(SSL_CTX *ctx, const unsigned char *val, size_t len); +__owur int SSL_get0_client_cert_type(const SSL *s, unsigned char **t, size_t *len); +__owur int SSL_get0_server_cert_type(const SSL *s, unsigned char **t, size_t *len); +__owur int SSL_CTX_get0_client_cert_type(const SSL_CTX *ctx, unsigned char **t, size_t *len); +__owur int SSL_CTX_get0_server_cert_type(const SSL_CTX *s, unsigned char **t, size_t *len); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h new file mode 100644 index 00000000000..e64ec3b37fb --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/ui.h @@ -0,0 +1,407 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ui.h.in + * + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_UI_H +# define OPENSSL_UI_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_UI_H +# endif + +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# endif +# include +# include +# include +# include + +/* For compatibility reasons, the macro OPENSSL_NO_UI is currently retained */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifdef OPENSSL_NO_UI_CONSOLE +# define OPENSSL_NO_UI +# endif +# endif + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * All the following functions return -1 or NULL on error and in some cases + * (UI_process()) -2 if interrupted or in some other way cancelled. When + * everything is fine, they return 0, a positive value or a non-NULL pointer, + * all depending on their purpose. + */ + +/* Creators and destructor. */ +UI *UI_new(void); +UI *UI_new_method(const UI_METHOD *method); +void UI_free(UI *ui); + +/*- + The following functions are used to add strings to be printed and prompt + strings to prompt for data. The names are UI_{add,dup}__string + and UI_{add,dup}_input_boolean. + + UI_{add,dup}__string have the following meanings: + add add a text or prompt string. The pointers given to these + functions are used verbatim, no copying is done. + dup make a copy of the text or prompt string, then add the copy + to the collection of strings in the user interface. + + The function is a name for the functionality that the given + string shall be used for. It can be one of: + input use the string as data prompt. + verify use the string as verification prompt. This + is used to verify a previous input. + info use the string for informational output. + error use the string for error output. + Honestly, there's currently no difference between info and error for the + moment. + + UI_{add,dup}_input_boolean have the same semantics for "add" and "dup", + and are typically used when one wants to prompt for a yes/no response. + + All of the functions in this group take a UI and a prompt string. + The string input and verify addition functions also take a flag argument, + a buffer for the result to end up with, a minimum input size and a maximum + input size (the result buffer MUST be large enough to be able to contain + the maximum number of characters). Additionally, the verify addition + functions takes another buffer to compare the result against. + The boolean input functions take an action description string (which should + be safe to ignore if the expected user action is obvious, for example with + a dialog box with an OK button and a Cancel button), a string of acceptable + characters to mean OK and to mean Cancel. The two last strings are checked + to make sure they don't have common characters. Additionally, the same + flag argument as for the string input is taken, as well as a result buffer. + The result buffer is required to be at least one byte long. Depending on + the answer, the first character from the OK or the Cancel character strings + will be stored in the first byte of the result buffer. No NUL will be + added, so the result is *not* a string. + + On success, the all return an index of the added information. That index + is useful when retrieving results with UI_get0_result(). */ +int UI_add_input_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize); +int UI_dup_input_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize); +int UI_add_verify_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize, + const char *test_buf); +int UI_dup_verify_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize, + const char *test_buf); +int UI_add_input_boolean(UI *ui, const char *prompt, const char *action_desc, + const char *ok_chars, const char *cancel_chars, + int flags, char *result_buf); +int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, + const char *ok_chars, const char *cancel_chars, + int flags, char *result_buf); +int UI_add_info_string(UI *ui, const char *text); +int UI_dup_info_string(UI *ui, const char *text); +int UI_add_error_string(UI *ui, const char *text); +int UI_dup_error_string(UI *ui, const char *text); + +/* These are the possible flags. They can be or'ed together. */ +/* Use to have echoing of input */ +# define UI_INPUT_FLAG_ECHO 0x01 +/* + * Use a default password. Where that password is found is completely up to + * the application, it might for example be in the user data set with + * UI_add_user_data(). It is not recommended to have more than one input in + * each UI being marked with this flag, or the application might get + * confused. + */ +# define UI_INPUT_FLAG_DEFAULT_PWD 0x02 + +/*- + * The user of these routines may want to define flags of their own. The core + * UI won't look at those, but will pass them on to the method routines. They + * must use higher bits so they don't get confused with the UI bits above. + * UI_INPUT_FLAG_USER_BASE tells which is the lowest bit to use. A good + * example of use is this: + * + * #define MY_UI_FLAG1 (0x01 << UI_INPUT_FLAG_USER_BASE) + * +*/ +# define UI_INPUT_FLAG_USER_BASE 16 + +/*- + * The following function helps construct a prompt. + * phrase_desc is a textual short description of the phrase to enter, + * for example "pass phrase", and + * object_name is the name of the object + * (which might be a card name or a file name) or NULL. + * The returned string shall always be allocated on the heap with + * OPENSSL_malloc(), and need to be free'd with OPENSSL_free(). + * + * If the ui_method doesn't contain a pointer to a user-defined prompt + * constructor, a default string is built, looking like this: + * + * "Enter {phrase_desc} for {object_name}:" + * + * So, if phrase_desc has the value "pass phrase" and object_name has + * the value "foo.key", the resulting string is: + * + * "Enter pass phrase for foo.key:" +*/ +char *UI_construct_prompt(UI *ui_method, + const char *phrase_desc, const char *object_name); + +/* + * The following function is used to store a pointer to user-specific data. + * Any previous such pointer will be returned and replaced. + * + * For callback purposes, this function makes a lot more sense than using + * ex_data, since the latter requires that different parts of OpenSSL or + * applications share the same ex_data index. + * + * Note that the UI_OpenSSL() method completely ignores the user data. Other + * methods may not, however. + */ +void *UI_add_user_data(UI *ui, void *user_data); +/* + * Alternatively, this function is used to duplicate the user data. + * This uses the duplicator method function. The destroy function will + * be used to free the user data in this case. + */ +int UI_dup_user_data(UI *ui, void *user_data); +/* We need a user data retrieving function as well. */ +void *UI_get0_user_data(UI *ui); + +/* Return the result associated with a prompt given with the index i. */ +const char *UI_get0_result(UI *ui, int i); +int UI_get_result_length(UI *ui, int i); + +/* When all strings have been added, process the whole thing. */ +int UI_process(UI *ui); + +/* + * Give a user interface parameterised control commands. This can be used to + * send down an integer, a data pointer or a function pointer, as well as be + * used to get information from a UI. + */ +int UI_ctrl(UI *ui, int cmd, long i, void *p, void (*f) (void)); + +/* The commands */ +/* + * Use UI_CONTROL_PRINT_ERRORS with the value 1 to have UI_process print the + * OpenSSL error stack before printing any info or added error messages and + * before any prompting. + */ +# define UI_CTRL_PRINT_ERRORS 1 +/* + * Check if a UI_process() is possible to do again with the same instance of + * a user interface. This makes UI_ctrl() return 1 if it is redoable, and 0 + * if not. + */ +# define UI_CTRL_IS_REDOABLE 2 + +/* Some methods may use extra data */ +# define UI_set_app_data(s,arg) UI_set_ex_data(s,0,arg) +# define UI_get_app_data(s) UI_get_ex_data(s,0) + +# define UI_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_UI, l, p, newf, dupf, freef) +int UI_set_ex_data(UI *r, int idx, void *arg); +void *UI_get_ex_data(const UI *r, int idx); + +/* Use specific methods instead of the built-in one */ +void UI_set_default_method(const UI_METHOD *meth); +const UI_METHOD *UI_get_default_method(void); +const UI_METHOD *UI_get_method(UI *ui); +const UI_METHOD *UI_set_method(UI *ui, const UI_METHOD *meth); + +# ifndef OPENSSL_NO_UI_CONSOLE + +/* The method with all the built-in thingies */ +UI_METHOD *UI_OpenSSL(void); + +# endif + +/* + * NULL method. Literally does nothing, but may serve as a placeholder + * to avoid internal default. + */ +const UI_METHOD *UI_null(void); + +/* ---------- For method writers ---------- */ +/*- + A method contains a number of functions that implement the low level + of the User Interface. The functions are: + + an opener This function starts a session, maybe by opening + a channel to a tty, or by opening a window. + a writer This function is called to write a given string, + maybe to the tty, maybe as a field label in a + window. + a flusher This function is called to flush everything that + has been output so far. It can be used to actually + display a dialog box after it has been built. + a reader This function is called to read a given prompt, + maybe from the tty, maybe from a field in a + window. Note that it's called with all string + structures, not only the prompt ones, so it must + check such things itself. + a closer This function closes the session, maybe by closing + the channel to the tty, or closing the window. + + All these functions are expected to return: + + 0 on error. + 1 on success. + -1 on out-of-band events, for example if some prompting has + been canceled (by pressing Ctrl-C, for example). This is + only checked when returned by the flusher or the reader. + + The way this is used, the opener is first called, then the writer for all + strings, then the flusher, then the reader for all strings and finally the + closer. Note that if you want to prompt from a terminal or other command + line interface, the best is to have the reader also write the prompts + instead of having the writer do it. If you want to prompt from a dialog + box, the writer can be used to build up the contents of the box, and the + flusher to actually display the box and run the event loop until all data + has been given, after which the reader only grabs the given data and puts + them back into the UI strings. + + All method functions take a UI as argument. Additionally, the writer and + the reader take a UI_STRING. +*/ + +/* + * The UI_STRING type is the data structure that contains all the needed info + * about a string or a prompt, including test data for a verification prompt. + */ +typedef struct ui_string_st UI_STRING; + +SKM_DEFINE_STACK_OF_INTERNAL(UI_STRING, UI_STRING, UI_STRING) +#define sk_UI_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_value(sk, idx) ((UI_STRING *)OPENSSL_sk_value(ossl_check_const_UI_STRING_sk_type(sk), (idx))) +#define sk_UI_STRING_new(cmp) ((STACK_OF(UI_STRING) *)OPENSSL_sk_new(ossl_check_UI_STRING_compfunc_type(cmp))) +#define sk_UI_STRING_new_null() ((STACK_OF(UI_STRING) *)OPENSSL_sk_new_null()) +#define sk_UI_STRING_new_reserve(cmp, n) ((STACK_OF(UI_STRING) *)OPENSSL_sk_new_reserve(ossl_check_UI_STRING_compfunc_type(cmp), (n))) +#define sk_UI_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_UI_STRING_sk_type(sk), (n)) +#define sk_UI_STRING_free(sk) OPENSSL_sk_free(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_delete(sk, i) ((UI_STRING *)OPENSSL_sk_delete(ossl_check_UI_STRING_sk_type(sk), (i))) +#define sk_UI_STRING_delete_ptr(sk, ptr) ((UI_STRING *)OPENSSL_sk_delete_ptr(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr))) +#define sk_UI_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_pop(sk) ((UI_STRING *)OPENSSL_sk_pop(ossl_check_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_shift(sk) ((UI_STRING *)OPENSSL_sk_shift(ossl_check_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_UI_STRING_sk_type(sk),ossl_check_UI_STRING_freefunc_type(freefunc)) +#define sk_UI_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr), (idx)) +#define sk_UI_STRING_set(sk, idx, ptr) ((UI_STRING *)OPENSSL_sk_set(ossl_check_UI_STRING_sk_type(sk), (idx), ossl_check_UI_STRING_type(ptr))) +#define sk_UI_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr), pnum) +#define sk_UI_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_dup(sk) ((STACK_OF(UI_STRING) *)OPENSSL_sk_dup(ossl_check_const_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(UI_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_UI_STRING_sk_type(sk), ossl_check_UI_STRING_copyfunc_type(copyfunc), ossl_check_UI_STRING_freefunc_type(freefunc))) +#define sk_UI_STRING_set_cmp_func(sk, cmp) ((sk_UI_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_compfunc_type(cmp))) + + +/* + * The different types of strings that are currently supported. This is only + * needed by method authors. + */ +enum UI_string_types { + UIT_NONE = 0, + UIT_PROMPT, /* Prompt for a string */ + UIT_VERIFY, /* Prompt for a string and verify */ + UIT_BOOLEAN, /* Prompt for a yes/no response */ + UIT_INFO, /* Send info to the user */ + UIT_ERROR /* Send an error message to the user */ +}; + +/* Create and manipulate methods */ +UI_METHOD *UI_create_method(const char *name); +void UI_destroy_method(UI_METHOD *ui_method); +int UI_method_set_opener(UI_METHOD *method, int (*opener) (UI *ui)); +int UI_method_set_writer(UI_METHOD *method, + int (*writer) (UI *ui, UI_STRING *uis)); +int UI_method_set_flusher(UI_METHOD *method, int (*flusher) (UI *ui)); +int UI_method_set_reader(UI_METHOD *method, + int (*reader) (UI *ui, UI_STRING *uis)); +int UI_method_set_closer(UI_METHOD *method, int (*closer) (UI *ui)); +int UI_method_set_data_duplicator(UI_METHOD *method, + void *(*duplicator) (UI *ui, void *ui_data), + void (*destructor)(UI *ui, void *ui_data)); +int UI_method_set_prompt_constructor(UI_METHOD *method, + char *(*prompt_constructor) (UI *ui, + const char + *phrase_desc, + const char + *object_name)); +int UI_method_set_ex_data(UI_METHOD *method, int idx, void *data); +int (*UI_method_get_opener(const UI_METHOD *method)) (UI *); +int (*UI_method_get_writer(const UI_METHOD *method)) (UI *, UI_STRING *); +int (*UI_method_get_flusher(const UI_METHOD *method)) (UI *); +int (*UI_method_get_reader(const UI_METHOD *method)) (UI *, UI_STRING *); +int (*UI_method_get_closer(const UI_METHOD *method)) (UI *); +char *(*UI_method_get_prompt_constructor(const UI_METHOD *method)) + (UI *, const char *, const char *); +void *(*UI_method_get_data_duplicator(const UI_METHOD *method)) (UI *, void *); +void (*UI_method_get_data_destructor(const UI_METHOD *method)) (UI *, void *); +const void *UI_method_get_ex_data(const UI_METHOD *method, int idx); + +/* + * The following functions are helpers for method writers to access relevant + * data from a UI_STRING. + */ + +/* Return type of the UI_STRING */ +enum UI_string_types UI_get_string_type(UI_STRING *uis); +/* Return input flags of the UI_STRING */ +int UI_get_input_flags(UI_STRING *uis); +/* Return the actual string to output (the prompt, info or error) */ +const char *UI_get0_output_string(UI_STRING *uis); +/* + * Return the optional action string to output (the boolean prompt + * instruction) + */ +const char *UI_get0_action_string(UI_STRING *uis); +/* Return the result of a prompt */ +const char *UI_get0_result_string(UI_STRING *uis); +int UI_get_result_string_length(UI_STRING *uis); +/* + * Return the string to test the result against. Only useful with verifies. + */ +const char *UI_get0_test_string(UI_STRING *uis); +/* Return the required minimum size of the result */ +int UI_get_result_minsize(UI_STRING *uis); +/* Return the required maximum size of the result */ +int UI_get_result_maxsize(UI_STRING *uis); +/* Set the result of a UI_STRING. */ +int UI_set_result(UI *ui, UI_STRING *uis, const char *result); +int UI_set_result_ex(UI *ui, UI_STRING *uis, const char *result, int len); + +/* A couple of popular utility functions */ +int UI_UTIL_read_pw_string(char *buf, int length, const char *prompt, + int verify); +int UI_UTIL_read_pw(char *buf, char *buff, int size, const char *prompt, + int verify); +UI_METHOD *UI_UTIL_wrap_read_pem_callback(pem_password_cb *cb, int rwflag); + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h new file mode 100644 index 00000000000..ac1326330b8 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509.h @@ -0,0 +1,1286 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_H +# define OPENSSL_X509_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509_H +# endif + +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# include +# include +# endif + +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Needed stacks for types defined in other headers */ +SKM_DEFINE_STACK_OF_INTERNAL(X509_NAME, X509_NAME, X509_NAME) +#define sk_X509_NAME_num(sk) OPENSSL_sk_num(ossl_check_const_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_value(sk, idx) ((X509_NAME *)OPENSSL_sk_value(ossl_check_const_X509_NAME_sk_type(sk), (idx))) +#define sk_X509_NAME_new(cmp) ((STACK_OF(X509_NAME) *)OPENSSL_sk_new(ossl_check_X509_NAME_compfunc_type(cmp))) +#define sk_X509_NAME_new_null() ((STACK_OF(X509_NAME) *)OPENSSL_sk_new_null()) +#define sk_X509_NAME_new_reserve(cmp, n) ((STACK_OF(X509_NAME) *)OPENSSL_sk_new_reserve(ossl_check_X509_NAME_compfunc_type(cmp), (n))) +#define sk_X509_NAME_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_NAME_sk_type(sk), (n)) +#define sk_X509_NAME_free(sk) OPENSSL_sk_free(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_zero(sk) OPENSSL_sk_zero(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_delete(sk, i) ((X509_NAME *)OPENSSL_sk_delete(ossl_check_X509_NAME_sk_type(sk), (i))) +#define sk_X509_NAME_delete_ptr(sk, ptr) ((X509_NAME *)OPENSSL_sk_delete_ptr(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr))) +#define sk_X509_NAME_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_pop(sk) ((X509_NAME *)OPENSSL_sk_pop(ossl_check_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_shift(sk) ((X509_NAME *)OPENSSL_sk_shift(ossl_check_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_NAME_sk_type(sk),ossl_check_X509_NAME_freefunc_type(freefunc)) +#define sk_X509_NAME_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr), (idx)) +#define sk_X509_NAME_set(sk, idx, ptr) ((X509_NAME *)OPENSSL_sk_set(ossl_check_X509_NAME_sk_type(sk), (idx), ossl_check_X509_NAME_type(ptr))) +#define sk_X509_NAME_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr), pnum) +#define sk_X509_NAME_sort(sk) OPENSSL_sk_sort(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_dup(sk) ((STACK_OF(X509_NAME) *)OPENSSL_sk_dup(ossl_check_const_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_NAME) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_NAME_sk_type(sk), ossl_check_X509_NAME_copyfunc_type(copyfunc), ossl_check_X509_NAME_freefunc_type(freefunc))) +#define sk_X509_NAME_set_cmp_func(sk, cmp) ((sk_X509_NAME_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509, X509, X509) +#define sk_X509_num(sk) OPENSSL_sk_num(ossl_check_const_X509_sk_type(sk)) +#define sk_X509_value(sk, idx) ((X509 *)OPENSSL_sk_value(ossl_check_const_X509_sk_type(sk), (idx))) +#define sk_X509_new(cmp) ((STACK_OF(X509) *)OPENSSL_sk_new(ossl_check_X509_compfunc_type(cmp))) +#define sk_X509_new_null() ((STACK_OF(X509) *)OPENSSL_sk_new_null()) +#define sk_X509_new_reserve(cmp, n) ((STACK_OF(X509) *)OPENSSL_sk_new_reserve(ossl_check_X509_compfunc_type(cmp), (n))) +#define sk_X509_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_sk_type(sk), (n)) +#define sk_X509_free(sk) OPENSSL_sk_free(ossl_check_X509_sk_type(sk)) +#define sk_X509_zero(sk) OPENSSL_sk_zero(ossl_check_X509_sk_type(sk)) +#define sk_X509_delete(sk, i) ((X509 *)OPENSSL_sk_delete(ossl_check_X509_sk_type(sk), (i))) +#define sk_X509_delete_ptr(sk, ptr) ((X509 *)OPENSSL_sk_delete_ptr(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr))) +#define sk_X509_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_pop(sk) ((X509 *)OPENSSL_sk_pop(ossl_check_X509_sk_type(sk))) +#define sk_X509_shift(sk) ((X509 *)OPENSSL_sk_shift(ossl_check_X509_sk_type(sk))) +#define sk_X509_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_sk_type(sk),ossl_check_X509_freefunc_type(freefunc)) +#define sk_X509_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr), (idx)) +#define sk_X509_set(sk, idx, ptr) ((X509 *)OPENSSL_sk_set(ossl_check_X509_sk_type(sk), (idx), ossl_check_X509_type(ptr))) +#define sk_X509_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr), pnum) +#define sk_X509_sort(sk) OPENSSL_sk_sort(ossl_check_X509_sk_type(sk)) +#define sk_X509_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_sk_type(sk)) +#define sk_X509_dup(sk) ((STACK_OF(X509) *)OPENSSL_sk_dup(ossl_check_const_X509_sk_type(sk))) +#define sk_X509_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_sk_type(sk), ossl_check_X509_copyfunc_type(copyfunc), ossl_check_X509_freefunc_type(freefunc))) +#define sk_X509_set_cmp_func(sk, cmp) ((sk_X509_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_sk_type(sk), ossl_check_X509_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_REVOKED, X509_REVOKED, X509_REVOKED) +#define sk_X509_REVOKED_num(sk) OPENSSL_sk_num(ossl_check_const_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_value(sk, idx) ((X509_REVOKED *)OPENSSL_sk_value(ossl_check_const_X509_REVOKED_sk_type(sk), (idx))) +#define sk_X509_REVOKED_new(cmp) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new(ossl_check_X509_REVOKED_compfunc_type(cmp))) +#define sk_X509_REVOKED_new_null() ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new_null()) +#define sk_X509_REVOKED_new_reserve(cmp, n) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new_reserve(ossl_check_X509_REVOKED_compfunc_type(cmp), (n))) +#define sk_X509_REVOKED_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_REVOKED_sk_type(sk), (n)) +#define sk_X509_REVOKED_free(sk) OPENSSL_sk_free(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_zero(sk) OPENSSL_sk_zero(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_delete(sk, i) ((X509_REVOKED *)OPENSSL_sk_delete(ossl_check_X509_REVOKED_sk_type(sk), (i))) +#define sk_X509_REVOKED_delete_ptr(sk, ptr) ((X509_REVOKED *)OPENSSL_sk_delete_ptr(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr))) +#define sk_X509_REVOKED_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_pop(sk) ((X509_REVOKED *)OPENSSL_sk_pop(ossl_check_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_shift(sk) ((X509_REVOKED *)OPENSSL_sk_shift(ossl_check_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_REVOKED_sk_type(sk),ossl_check_X509_REVOKED_freefunc_type(freefunc)) +#define sk_X509_REVOKED_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr), (idx)) +#define sk_X509_REVOKED_set(sk, idx, ptr) ((X509_REVOKED *)OPENSSL_sk_set(ossl_check_X509_REVOKED_sk_type(sk), (idx), ossl_check_X509_REVOKED_type(ptr))) +#define sk_X509_REVOKED_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr), pnum) +#define sk_X509_REVOKED_sort(sk) OPENSSL_sk_sort(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_dup(sk) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_dup(ossl_check_const_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_copyfunc_type(copyfunc), ossl_check_X509_REVOKED_freefunc_type(freefunc))) +#define sk_X509_REVOKED_set_cmp_func(sk, cmp) ((sk_X509_REVOKED_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_CRL, X509_CRL, X509_CRL) +#define sk_X509_CRL_num(sk) OPENSSL_sk_num(ossl_check_const_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_value(sk, idx) ((X509_CRL *)OPENSSL_sk_value(ossl_check_const_X509_CRL_sk_type(sk), (idx))) +#define sk_X509_CRL_new(cmp) ((STACK_OF(X509_CRL) *)OPENSSL_sk_new(ossl_check_X509_CRL_compfunc_type(cmp))) +#define sk_X509_CRL_new_null() ((STACK_OF(X509_CRL) *)OPENSSL_sk_new_null()) +#define sk_X509_CRL_new_reserve(cmp, n) ((STACK_OF(X509_CRL) *)OPENSSL_sk_new_reserve(ossl_check_X509_CRL_compfunc_type(cmp), (n))) +#define sk_X509_CRL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_CRL_sk_type(sk), (n)) +#define sk_X509_CRL_free(sk) OPENSSL_sk_free(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_zero(sk) OPENSSL_sk_zero(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_delete(sk, i) ((X509_CRL *)OPENSSL_sk_delete(ossl_check_X509_CRL_sk_type(sk), (i))) +#define sk_X509_CRL_delete_ptr(sk, ptr) ((X509_CRL *)OPENSSL_sk_delete_ptr(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr))) +#define sk_X509_CRL_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_pop(sk) ((X509_CRL *)OPENSSL_sk_pop(ossl_check_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_shift(sk) ((X509_CRL *)OPENSSL_sk_shift(ossl_check_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_CRL_sk_type(sk),ossl_check_X509_CRL_freefunc_type(freefunc)) +#define sk_X509_CRL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr), (idx)) +#define sk_X509_CRL_set(sk, idx, ptr) ((X509_CRL *)OPENSSL_sk_set(ossl_check_X509_CRL_sk_type(sk), (idx), ossl_check_X509_CRL_type(ptr))) +#define sk_X509_CRL_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr), pnum) +#define sk_X509_CRL_sort(sk) OPENSSL_sk_sort(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_dup(sk) ((STACK_OF(X509_CRL) *)OPENSSL_sk_dup(ossl_check_const_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_CRL) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_CRL_sk_type(sk), ossl_check_X509_CRL_copyfunc_type(copyfunc), ossl_check_X509_CRL_freefunc_type(freefunc))) +#define sk_X509_CRL_set_cmp_func(sk, cmp) ((sk_X509_CRL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_compfunc_type(cmp))) + + +/* Flags for X509_get_signature_info() */ +/* Signature info is valid */ +# define X509_SIG_INFO_VALID 0x1 +/* Signature is suitable for TLS use */ +# define X509_SIG_INFO_TLS 0x2 + +# define X509_FILETYPE_PEM 1 +# define X509_FILETYPE_ASN1 2 +# define X509_FILETYPE_DEFAULT 3 + +# define X509v3_KU_DIGITAL_SIGNATURE 0x0080 +# define X509v3_KU_NON_REPUDIATION 0x0040 +# define X509v3_KU_KEY_ENCIPHERMENT 0x0020 +# define X509v3_KU_DATA_ENCIPHERMENT 0x0010 +# define X509v3_KU_KEY_AGREEMENT 0x0008 +# define X509v3_KU_KEY_CERT_SIGN 0x0004 +# define X509v3_KU_CRL_SIGN 0x0002 +# define X509v3_KU_ENCIPHER_ONLY 0x0001 +# define X509v3_KU_DECIPHER_ONLY 0x8000 +# define X509v3_KU_UNDEF 0xffff + +struct X509_algor_st { + ASN1_OBJECT *algorithm; + ASN1_TYPE *parameter; +} /* X509_ALGOR */ ; + +typedef STACK_OF(X509_ALGOR) X509_ALGORS; + +typedef struct X509_val_st { + ASN1_TIME *notBefore; + ASN1_TIME *notAfter; +} X509_VAL; + +typedef struct X509_sig_st X509_SIG; + +typedef struct X509_name_entry_st X509_NAME_ENTRY; + +SKM_DEFINE_STACK_OF_INTERNAL(X509_NAME_ENTRY, X509_NAME_ENTRY, X509_NAME_ENTRY) +#define sk_X509_NAME_ENTRY_num(sk) OPENSSL_sk_num(ossl_check_const_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_value(sk, idx) ((X509_NAME_ENTRY *)OPENSSL_sk_value(ossl_check_const_X509_NAME_ENTRY_sk_type(sk), (idx))) +#define sk_X509_NAME_ENTRY_new(cmp) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new(ossl_check_X509_NAME_ENTRY_compfunc_type(cmp))) +#define sk_X509_NAME_ENTRY_new_null() ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new_null()) +#define sk_X509_NAME_ENTRY_new_reserve(cmp, n) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new_reserve(ossl_check_X509_NAME_ENTRY_compfunc_type(cmp), (n))) +#define sk_X509_NAME_ENTRY_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_NAME_ENTRY_sk_type(sk), (n)) +#define sk_X509_NAME_ENTRY_free(sk) OPENSSL_sk_free(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_zero(sk) OPENSSL_sk_zero(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_delete(sk, i) ((X509_NAME_ENTRY *)OPENSSL_sk_delete(ossl_check_X509_NAME_ENTRY_sk_type(sk), (i))) +#define sk_X509_NAME_ENTRY_delete_ptr(sk, ptr) ((X509_NAME_ENTRY *)OPENSSL_sk_delete_ptr(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr))) +#define sk_X509_NAME_ENTRY_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_pop(sk) ((X509_NAME_ENTRY *)OPENSSL_sk_pop(ossl_check_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_shift(sk) ((X509_NAME_ENTRY *)OPENSSL_sk_shift(ossl_check_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_NAME_ENTRY_sk_type(sk),ossl_check_X509_NAME_ENTRY_freefunc_type(freefunc)) +#define sk_X509_NAME_ENTRY_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr), (idx)) +#define sk_X509_NAME_ENTRY_set(sk, idx, ptr) ((X509_NAME_ENTRY *)OPENSSL_sk_set(ossl_check_X509_NAME_ENTRY_sk_type(sk), (idx), ossl_check_X509_NAME_ENTRY_type(ptr))) +#define sk_X509_NAME_ENTRY_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr), pnum) +#define sk_X509_NAME_ENTRY_sort(sk) OPENSSL_sk_sort(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_dup(sk) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_dup(ossl_check_const_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_copyfunc_type(copyfunc), ossl_check_X509_NAME_ENTRY_freefunc_type(freefunc))) +#define sk_X509_NAME_ENTRY_set_cmp_func(sk, cmp) ((sk_X509_NAME_ENTRY_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_compfunc_type(cmp))) + + +# define X509_EX_V_NETSCAPE_HACK 0x8000 +# define X509_EX_V_INIT 0x0001 +typedef struct X509_extension_st X509_EXTENSION; +SKM_DEFINE_STACK_OF_INTERNAL(X509_EXTENSION, X509_EXTENSION, X509_EXTENSION) +#define sk_X509_EXTENSION_num(sk) OPENSSL_sk_num(ossl_check_const_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_value(sk, idx) ((X509_EXTENSION *)OPENSSL_sk_value(ossl_check_const_X509_EXTENSION_sk_type(sk), (idx))) +#define sk_X509_EXTENSION_new(cmp) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new(ossl_check_X509_EXTENSION_compfunc_type(cmp))) +#define sk_X509_EXTENSION_new_null() ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new_null()) +#define sk_X509_EXTENSION_new_reserve(cmp, n) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new_reserve(ossl_check_X509_EXTENSION_compfunc_type(cmp), (n))) +#define sk_X509_EXTENSION_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_EXTENSION_sk_type(sk), (n)) +#define sk_X509_EXTENSION_free(sk) OPENSSL_sk_free(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_zero(sk) OPENSSL_sk_zero(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_delete(sk, i) ((X509_EXTENSION *)OPENSSL_sk_delete(ossl_check_X509_EXTENSION_sk_type(sk), (i))) +#define sk_X509_EXTENSION_delete_ptr(sk, ptr) ((X509_EXTENSION *)OPENSSL_sk_delete_ptr(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr))) +#define sk_X509_EXTENSION_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_pop(sk) ((X509_EXTENSION *)OPENSSL_sk_pop(ossl_check_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_shift(sk) ((X509_EXTENSION *)OPENSSL_sk_shift(ossl_check_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_EXTENSION_sk_type(sk),ossl_check_X509_EXTENSION_freefunc_type(freefunc)) +#define sk_X509_EXTENSION_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr), (idx)) +#define sk_X509_EXTENSION_set(sk, idx, ptr) ((X509_EXTENSION *)OPENSSL_sk_set(ossl_check_X509_EXTENSION_sk_type(sk), (idx), ossl_check_X509_EXTENSION_type(ptr))) +#define sk_X509_EXTENSION_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr), pnum) +#define sk_X509_EXTENSION_sort(sk) OPENSSL_sk_sort(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_dup(sk) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_dup(ossl_check_const_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_copyfunc_type(copyfunc), ossl_check_X509_EXTENSION_freefunc_type(freefunc))) +#define sk_X509_EXTENSION_set_cmp_func(sk, cmp) ((sk_X509_EXTENSION_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_compfunc_type(cmp))) + +typedef STACK_OF(X509_EXTENSION) X509_EXTENSIONS; +typedef struct x509_attributes_st X509_ATTRIBUTE; +SKM_DEFINE_STACK_OF_INTERNAL(X509_ATTRIBUTE, X509_ATTRIBUTE, X509_ATTRIBUTE) +#define sk_X509_ATTRIBUTE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_value(sk, idx) ((X509_ATTRIBUTE *)OPENSSL_sk_value(ossl_check_const_X509_ATTRIBUTE_sk_type(sk), (idx))) +#define sk_X509_ATTRIBUTE_new(cmp) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new(ossl_check_X509_ATTRIBUTE_compfunc_type(cmp))) +#define sk_X509_ATTRIBUTE_new_null() ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new_null()) +#define sk_X509_ATTRIBUTE_new_reserve(cmp, n) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new_reserve(ossl_check_X509_ATTRIBUTE_compfunc_type(cmp), (n))) +#define sk_X509_ATTRIBUTE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_ATTRIBUTE_sk_type(sk), (n)) +#define sk_X509_ATTRIBUTE_free(sk) OPENSSL_sk_free(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_delete(sk, i) ((X509_ATTRIBUTE *)OPENSSL_sk_delete(ossl_check_X509_ATTRIBUTE_sk_type(sk), (i))) +#define sk_X509_ATTRIBUTE_delete_ptr(sk, ptr) ((X509_ATTRIBUTE *)OPENSSL_sk_delete_ptr(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr))) +#define sk_X509_ATTRIBUTE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_pop(sk) ((X509_ATTRIBUTE *)OPENSSL_sk_pop(ossl_check_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_shift(sk) ((X509_ATTRIBUTE *)OPENSSL_sk_shift(ossl_check_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_ATTRIBUTE_sk_type(sk),ossl_check_X509_ATTRIBUTE_freefunc_type(freefunc)) +#define sk_X509_ATTRIBUTE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr), (idx)) +#define sk_X509_ATTRIBUTE_set(sk, idx, ptr) ((X509_ATTRIBUTE *)OPENSSL_sk_set(ossl_check_X509_ATTRIBUTE_sk_type(sk), (idx), ossl_check_X509_ATTRIBUTE_type(ptr))) +#define sk_X509_ATTRIBUTE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr), pnum) +#define sk_X509_ATTRIBUTE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_dup(sk) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_dup(ossl_check_const_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_copyfunc_type(copyfunc), ossl_check_X509_ATTRIBUTE_freefunc_type(freefunc))) +#define sk_X509_ATTRIBUTE_set_cmp_func(sk, cmp) ((sk_X509_ATTRIBUTE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_compfunc_type(cmp))) + +typedef struct X509_req_info_st X509_REQ_INFO; +typedef struct X509_req_st X509_REQ; +typedef struct x509_cert_aux_st X509_CERT_AUX; +typedef struct x509_cinf_st X509_CINF; + +/* Flags for X509_print_ex() */ + +# define X509_FLAG_COMPAT 0 +# define X509_FLAG_NO_HEADER 1L +# define X509_FLAG_NO_VERSION (1L << 1) +# define X509_FLAG_NO_SERIAL (1L << 2) +# define X509_FLAG_NO_SIGNAME (1L << 3) +# define X509_FLAG_NO_ISSUER (1L << 4) +# define X509_FLAG_NO_VALIDITY (1L << 5) +# define X509_FLAG_NO_SUBJECT (1L << 6) +# define X509_FLAG_NO_PUBKEY (1L << 7) +# define X509_FLAG_NO_EXTENSIONS (1L << 8) +# define X509_FLAG_NO_SIGDUMP (1L << 9) +# define X509_FLAG_NO_AUX (1L << 10) +# define X509_FLAG_NO_ATTRIBUTES (1L << 11) +# define X509_FLAG_NO_IDS (1L << 12) +# define X509_FLAG_EXTENSIONS_ONLY_KID (1L << 13) + +/* Flags specific to X509_NAME_print_ex() */ + +/* The field separator information */ + +# define XN_FLAG_SEP_MASK (0xf << 16) + +# define XN_FLAG_COMPAT 0/* Traditional; use old X509_NAME_print */ +# define XN_FLAG_SEP_COMMA_PLUS (1 << 16)/* RFC2253 ,+ */ +# define XN_FLAG_SEP_CPLUS_SPC (2 << 16)/* ,+ spaced: more readable */ +# define XN_FLAG_SEP_SPLUS_SPC (3 << 16)/* ;+ spaced */ +# define XN_FLAG_SEP_MULTILINE (4 << 16)/* One line per field */ + +# define XN_FLAG_DN_REV (1 << 20)/* Reverse DN order */ + +/* How the field name is shown */ + +# define XN_FLAG_FN_MASK (0x3 << 21) + +# define XN_FLAG_FN_SN 0/* Object short name */ +# define XN_FLAG_FN_LN (1 << 21)/* Object long name */ +# define XN_FLAG_FN_OID (2 << 21)/* Always use OIDs */ +# define XN_FLAG_FN_NONE (3 << 21)/* No field names */ + +# define XN_FLAG_SPC_EQ (1 << 23)/* Put spaces round '=' */ + +/* + * This determines if we dump fields we don't recognise: RFC2253 requires + * this. + */ + +# define XN_FLAG_DUMP_UNKNOWN_FIELDS (1 << 24) + +# define XN_FLAG_FN_ALIGN (1 << 25)/* Align field names to 20 + * characters */ + +/* Complete set of RFC2253 flags */ + +# define XN_FLAG_RFC2253 (ASN1_STRFLGS_RFC2253 | \ + XN_FLAG_SEP_COMMA_PLUS | \ + XN_FLAG_DN_REV | \ + XN_FLAG_FN_SN | \ + XN_FLAG_DUMP_UNKNOWN_FIELDS) + +/* readable oneline form */ + +# define XN_FLAG_ONELINE (ASN1_STRFLGS_RFC2253 | \ + ASN1_STRFLGS_ESC_QUOTE | \ + XN_FLAG_SEP_CPLUS_SPC | \ + XN_FLAG_SPC_EQ | \ + XN_FLAG_FN_SN) + +/* readable multiline form */ + +# define XN_FLAG_MULTILINE (ASN1_STRFLGS_ESC_CTRL | \ + ASN1_STRFLGS_ESC_MSB | \ + XN_FLAG_SEP_MULTILINE | \ + XN_FLAG_SPC_EQ | \ + XN_FLAG_FN_LN | \ + XN_FLAG_FN_ALIGN) + +typedef struct X509_crl_info_st X509_CRL_INFO; + +typedef struct private_key_st { + int version; + /* The PKCS#8 data types */ + X509_ALGOR *enc_algor; + ASN1_OCTET_STRING *enc_pkey; /* encrypted pub key */ + /* When decrypted, the following will not be NULL */ + EVP_PKEY *dec_pkey; + /* used to encrypt and decrypt */ + int key_length; + char *key_data; + int key_free; /* true if we should auto free key_data */ + /* expanded version of 'enc_algor' */ + EVP_CIPHER_INFO cipher; +} X509_PKEY; + +typedef struct X509_info_st { + X509 *x509; + X509_CRL *crl; + X509_PKEY *x_pkey; + EVP_CIPHER_INFO enc_cipher; + int enc_len; + char *enc_data; +} X509_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(X509_INFO, X509_INFO, X509_INFO) +#define sk_X509_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_value(sk, idx) ((X509_INFO *)OPENSSL_sk_value(ossl_check_const_X509_INFO_sk_type(sk), (idx))) +#define sk_X509_INFO_new(cmp) ((STACK_OF(X509_INFO) *)OPENSSL_sk_new(ossl_check_X509_INFO_compfunc_type(cmp))) +#define sk_X509_INFO_new_null() ((STACK_OF(X509_INFO) *)OPENSSL_sk_new_null()) +#define sk_X509_INFO_new_reserve(cmp, n) ((STACK_OF(X509_INFO) *)OPENSSL_sk_new_reserve(ossl_check_X509_INFO_compfunc_type(cmp), (n))) +#define sk_X509_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_INFO_sk_type(sk), (n)) +#define sk_X509_INFO_free(sk) OPENSSL_sk_free(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_delete(sk, i) ((X509_INFO *)OPENSSL_sk_delete(ossl_check_X509_INFO_sk_type(sk), (i))) +#define sk_X509_INFO_delete_ptr(sk, ptr) ((X509_INFO *)OPENSSL_sk_delete_ptr(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr))) +#define sk_X509_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_pop(sk) ((X509_INFO *)OPENSSL_sk_pop(ossl_check_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_shift(sk) ((X509_INFO *)OPENSSL_sk_shift(ossl_check_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_INFO_sk_type(sk),ossl_check_X509_INFO_freefunc_type(freefunc)) +#define sk_X509_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr), (idx)) +#define sk_X509_INFO_set(sk, idx, ptr) ((X509_INFO *)OPENSSL_sk_set(ossl_check_X509_INFO_sk_type(sk), (idx), ossl_check_X509_INFO_type(ptr))) +#define sk_X509_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr), pnum) +#define sk_X509_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_dup(sk) ((STACK_OF(X509_INFO) *)OPENSSL_sk_dup(ossl_check_const_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_INFO_sk_type(sk), ossl_check_X509_INFO_copyfunc_type(copyfunc), ossl_check_X509_INFO_freefunc_type(freefunc))) +#define sk_X509_INFO_set_cmp_func(sk, cmp) ((sk_X509_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_compfunc_type(cmp))) + + +/* + * The next 2 structures and their 8 routines are used to manipulate Netscape's + * spki structures - useful if you are writing a CA web page + */ +typedef struct Netscape_spkac_st { + X509_PUBKEY *pubkey; + ASN1_IA5STRING *challenge; /* challenge sent in atlas >= PR2 */ +} NETSCAPE_SPKAC; + +typedef struct Netscape_spki_st { + NETSCAPE_SPKAC *spkac; /* signed public key and challenge */ + X509_ALGOR sig_algor; + ASN1_BIT_STRING *signature; +} NETSCAPE_SPKI; + +/* Netscape certificate sequence structure */ +typedef struct Netscape_certificate_sequence { + ASN1_OBJECT *type; + STACK_OF(X509) *certs; +} NETSCAPE_CERT_SEQUENCE; + +/*- Unused (and iv length is wrong) +typedef struct CBCParameter_st + { + unsigned char iv[8]; + } CBC_PARAM; +*/ + +/* Password based encryption structure */ + +typedef struct PBEPARAM_st { + ASN1_OCTET_STRING *salt; + ASN1_INTEGER *iter; +} PBEPARAM; + +/* Password based encryption V2 structures */ + +typedef struct PBE2PARAM_st { + X509_ALGOR *keyfunc; + X509_ALGOR *encryption; +} PBE2PARAM; + +typedef struct PBKDF2PARAM_st { +/* Usually OCTET STRING but could be anything */ + ASN1_TYPE *salt; + ASN1_INTEGER *iter; + ASN1_INTEGER *keylength; + X509_ALGOR *prf; +} PBKDF2PARAM; + +#ifndef OPENSSL_NO_SCRYPT +typedef struct SCRYPT_PARAMS_st { + ASN1_OCTET_STRING *salt; + ASN1_INTEGER *costParameter; + ASN1_INTEGER *blockSize; + ASN1_INTEGER *parallelizationParameter; + ASN1_INTEGER *keyLength; +} SCRYPT_PARAMS; +#endif + +#ifdef __cplusplus +} +#endif + +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# define X509_EXT_PACK_UNKNOWN 1 +# define X509_EXT_PACK_STRING 2 + +# define X509_extract_key(x) X509_get_pubkey(x)/*****/ +# define X509_REQ_extract_key(a) X509_REQ_get_pubkey(a) +# define X509_name_cmp(a,b) X509_NAME_cmp((a),(b)) + +void X509_CRL_set_default_method(const X509_CRL_METHOD *meth); +X509_CRL_METHOD *X509_CRL_METHOD_new(int (*crl_init) (X509_CRL *crl), + int (*crl_free) (X509_CRL *crl), + int (*crl_lookup) (X509_CRL *crl, + X509_REVOKED **ret, + const + ASN1_INTEGER *serial, + const + X509_NAME *issuer), + int (*crl_verify) (X509_CRL *crl, + EVP_PKEY *pk)); +void X509_CRL_METHOD_free(X509_CRL_METHOD *m); + +void X509_CRL_set_meth_data(X509_CRL *crl, void *dat); +void *X509_CRL_get_meth_data(X509_CRL *crl); + +const char *X509_verify_cert_error_string(long n); + +int X509_verify(X509 *a, EVP_PKEY *r); +int X509_self_signed(X509 *cert, int verify_signature); + +int X509_REQ_verify_ex(X509_REQ *a, EVP_PKEY *r, OSSL_LIB_CTX *libctx, + const char *propq); +int X509_REQ_verify(X509_REQ *a, EVP_PKEY *r); +int X509_CRL_verify(X509_CRL *a, EVP_PKEY *r); +int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *a, EVP_PKEY *r); + +NETSCAPE_SPKI *NETSCAPE_SPKI_b64_decode(const char *str, int len); +char *NETSCAPE_SPKI_b64_encode(NETSCAPE_SPKI *x); +EVP_PKEY *NETSCAPE_SPKI_get_pubkey(NETSCAPE_SPKI *x); +int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *x, EVP_PKEY *pkey); + +int NETSCAPE_SPKI_print(BIO *out, NETSCAPE_SPKI *spki); + +int X509_signature_dump(BIO *bp, const ASN1_STRING *sig, int indent); +int X509_signature_print(BIO *bp, const X509_ALGOR *alg, + const ASN1_STRING *sig); + +int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx); +int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx); +int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx); +int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md); + +int X509_pubkey_digest(const X509 *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_digest(const X509 *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +ASN1_OCTET_STRING *X509_digest_sig(const X509 *cert, + EVP_MD **md_used, int *md_is_fallback); +int X509_CRL_digest(const X509_CRL *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_REQ_digest(const X509_REQ *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_NAME_digest(const X509_NAME *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); + +X509 *X509_load_http(const char *url, BIO *bio, BIO *rbio, int timeout); +X509_CRL *X509_CRL_load_http(const char *url, BIO *bio, BIO *rbio, int timeout); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# include /* OSSL_HTTP_REQ_CTX_nbio_d2i */ +# define X509_http_nbio(rctx, pcert) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(rctx, pcert, ASN1_ITEM_rptr(X509)) +# define X509_CRL_http_nbio(rctx, pcrl) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(rctx, pcrl, ASN1_ITEM_rptr(X509_CRL)) +# endif + +# ifndef OPENSSL_NO_STDIO +X509 *d2i_X509_fp(FILE *fp, X509 **x509); +int i2d_X509_fp(FILE *fp, const X509 *x509); +X509_CRL *d2i_X509_CRL_fp(FILE *fp, X509_CRL **crl); +int i2d_X509_CRL_fp(FILE *fp, const X509_CRL *crl); +X509_REQ *d2i_X509_REQ_fp(FILE *fp, X509_REQ **req); +int i2d_X509_REQ_fp(FILE *fp, const X509_REQ *req); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPrivateKey_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPrivateKey_fp(FILE *fp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPublicKey_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPublicKey_fp(FILE *fp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSA_PUBKEY_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSA_PUBKEY_fp(FILE *fp, const RSA *rsa); +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSA_PUBKEY_fp(FILE *fp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSA_PUBKEY_fp(FILE *fp, const DSA *dsa); +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSAPrivateKey_fp(FILE *fp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSAPrivateKey_fp(FILE *fp, const DSA *dsa); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_EC_PUBKEY_fp(FILE *fp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_EC_PUBKEY_fp(FILE *fp, const EC_KEY *eckey); +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_ECPrivateKey_fp(FILE *fp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_ECPrivateKey_fp(FILE *fp, const EC_KEY *eckey); +# endif /* OPENSSL_NO_EC */ +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ +X509_SIG *d2i_PKCS8_fp(FILE *fp, X509_SIG **p8); +int i2d_PKCS8_fp(FILE *fp, const X509_SIG *p8); +X509_PUBKEY *d2i_X509_PUBKEY_fp(FILE *fp, X509_PUBKEY **xpk); +int i2d_X509_PUBKEY_fp(FILE *fp, const X509_PUBKEY *xpk); +PKCS8_PRIV_KEY_INFO *d2i_PKCS8_PRIV_KEY_INFO_fp(FILE *fp, + PKCS8_PRIV_KEY_INFO **p8inf); +int i2d_PKCS8_PRIV_KEY_INFO_fp(FILE *fp, const PKCS8_PRIV_KEY_INFO *p8inf); +int i2d_PKCS8PrivateKeyInfo_fp(FILE *fp, const EVP_PKEY *key); +int i2d_PrivateKey_fp(FILE *fp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PrivateKey_ex_fp(FILE *fp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PrivateKey_fp(FILE *fp, EVP_PKEY **a); +int i2d_PUBKEY_fp(FILE *fp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PUBKEY_ex_fp(FILE *fp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PUBKEY_fp(FILE *fp, EVP_PKEY **a); +# endif + +X509 *d2i_X509_bio(BIO *bp, X509 **x509); +int i2d_X509_bio(BIO *bp, const X509 *x509); +X509_CRL *d2i_X509_CRL_bio(BIO *bp, X509_CRL **crl); +int i2d_X509_CRL_bio(BIO *bp, const X509_CRL *crl); +X509_REQ *d2i_X509_REQ_bio(BIO *bp, X509_REQ **req); +int i2d_X509_REQ_bio(BIO *bp, const X509_REQ *req); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPrivateKey_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPrivateKey_bio(BIO *bp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPublicKey_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPublicKey_bio(BIO *bp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSA_PUBKEY_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSA_PUBKEY_bio(BIO *bp, const RSA *rsa); +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSA_PUBKEY_bio(BIO *bp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSA_PUBKEY_bio(BIO *bp, const DSA *dsa); +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSAPrivateKey_bio(BIO *bp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSAPrivateKey_bio(BIO *bp, const DSA *dsa); +# endif +# endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_EC_PUBKEY_bio(BIO *bp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_EC_PUBKEY_bio(BIO *bp, const EC_KEY *eckey); +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_ECPrivateKey_bio(BIO *bp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_ECPrivateKey_bio(BIO *bp, const EC_KEY *eckey); +# endif /* OPENSSL_NO_EC */ +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ + +X509_SIG *d2i_PKCS8_bio(BIO *bp, X509_SIG **p8); +int i2d_PKCS8_bio(BIO *bp, const X509_SIG *p8); +X509_PUBKEY *d2i_X509_PUBKEY_bio(BIO *bp, X509_PUBKEY **xpk); +int i2d_X509_PUBKEY_bio(BIO *bp, const X509_PUBKEY *xpk); +PKCS8_PRIV_KEY_INFO *d2i_PKCS8_PRIV_KEY_INFO_bio(BIO *bp, + PKCS8_PRIV_KEY_INFO **p8inf); +int i2d_PKCS8_PRIV_KEY_INFO_bio(BIO *bp, const PKCS8_PRIV_KEY_INFO *p8inf); +int i2d_PKCS8PrivateKeyInfo_bio(BIO *bp, const EVP_PKEY *key); +int i2d_PrivateKey_bio(BIO *bp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PrivateKey_ex_bio(BIO *bp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PrivateKey_bio(BIO *bp, EVP_PKEY **a); +int i2d_PUBKEY_bio(BIO *bp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PUBKEY_ex_bio(BIO *bp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PUBKEY_bio(BIO *bp, EVP_PKEY **a); + +DECLARE_ASN1_DUP_FUNCTION(X509) +DECLARE_ASN1_DUP_FUNCTION(X509_ALGOR) +DECLARE_ASN1_DUP_FUNCTION(X509_ATTRIBUTE) +DECLARE_ASN1_DUP_FUNCTION(X509_CRL) +DECLARE_ASN1_DUP_FUNCTION(X509_EXTENSION) +DECLARE_ASN1_DUP_FUNCTION(X509_PUBKEY) +DECLARE_ASN1_DUP_FUNCTION(X509_REQ) +DECLARE_ASN1_DUP_FUNCTION(X509_REVOKED) +int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, + void *pval); +void X509_ALGOR_get0(const ASN1_OBJECT **paobj, int *pptype, + const void **ppval, const X509_ALGOR *algor); +void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); +int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b); +int X509_ALGOR_copy(X509_ALGOR *dest, const X509_ALGOR *src); + +DECLARE_ASN1_DUP_FUNCTION(X509_NAME) +DECLARE_ASN1_DUP_FUNCTION(X509_NAME_ENTRY) + +int X509_cmp_time(const ASN1_TIME *s, time_t *t); +int X509_cmp_current_time(const ASN1_TIME *s); +int X509_cmp_timeframe(const X509_VERIFY_PARAM *vpm, + const ASN1_TIME *start, const ASN1_TIME *end); +ASN1_TIME *X509_time_adj(ASN1_TIME *s, long adj, time_t *t); +ASN1_TIME *X509_time_adj_ex(ASN1_TIME *s, + int offset_day, long offset_sec, time_t *t); +ASN1_TIME *X509_gmtime_adj(ASN1_TIME *s, long adj); + +const char *X509_get_default_cert_area(void); +const char *X509_get_default_cert_dir(void); +const char *X509_get_default_cert_file(void); +const char *X509_get_default_cert_dir_env(void); +const char *X509_get_default_cert_file_env(void); +const char *X509_get_default_private_dir(void); + +X509_REQ *X509_to_X509_REQ(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +X509 *X509_REQ_to_X509(X509_REQ *r, int days, EVP_PKEY *pkey); + +DECLARE_ASN1_FUNCTIONS(X509_ALGOR) +DECLARE_ASN1_ENCODE_FUNCTIONS(X509_ALGORS, X509_ALGORS, X509_ALGORS) +DECLARE_ASN1_FUNCTIONS(X509_VAL) + +DECLARE_ASN1_FUNCTIONS(X509_PUBKEY) + +X509_PUBKEY *X509_PUBKEY_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +int X509_PUBKEY_set(X509_PUBKEY **x, EVP_PKEY *pkey); +EVP_PKEY *X509_PUBKEY_get0(const X509_PUBKEY *key); +EVP_PKEY *X509_PUBKEY_get(const X509_PUBKEY *key); +int X509_get_pubkey_parameters(EVP_PKEY *pkey, STACK_OF(X509) *chain); +long X509_get_pathlen(X509 *x); +DECLARE_ASN1_ENCODE_FUNCTIONS_only(EVP_PKEY, PUBKEY) +EVP_PKEY *d2i_PUBKEY_ex(EVP_PKEY **a, const unsigned char **pp, long length, + OSSL_LIB_CTX *libctx, const char *propq); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0,RSA, RSA_PUBKEY) +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0,DSA, DSA_PUBKEY) +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0, EC_KEY, EC_PUBKEY) +# endif +# endif + +DECLARE_ASN1_FUNCTIONS(X509_SIG) +void X509_SIG_get0(const X509_SIG *sig, const X509_ALGOR **palg, + const ASN1_OCTET_STRING **pdigest); +void X509_SIG_getm(X509_SIG *sig, X509_ALGOR **palg, + ASN1_OCTET_STRING **pdigest); + +DECLARE_ASN1_FUNCTIONS(X509_REQ_INFO) +DECLARE_ASN1_FUNCTIONS(X509_REQ) +X509_REQ *X509_REQ_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +DECLARE_ASN1_FUNCTIONS(X509_ATTRIBUTE) +X509_ATTRIBUTE *X509_ATTRIBUTE_create(int nid, int atrtype, void *value); + +DECLARE_ASN1_FUNCTIONS(X509_EXTENSION) +DECLARE_ASN1_ENCODE_FUNCTIONS(X509_EXTENSIONS, X509_EXTENSIONS, X509_EXTENSIONS) + +DECLARE_ASN1_FUNCTIONS(X509_NAME_ENTRY) + +DECLARE_ASN1_FUNCTIONS(X509_NAME) + +int X509_NAME_set(X509_NAME **xn, const X509_NAME *name); + +DECLARE_ASN1_FUNCTIONS(X509_CINF) +DECLARE_ASN1_FUNCTIONS(X509) +X509 *X509_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +DECLARE_ASN1_FUNCTIONS(X509_CERT_AUX) + +#define X509_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509, l, p, newf, dupf, freef) +int X509_set_ex_data(X509 *r, int idx, void *arg); +void *X509_get_ex_data(const X509 *r, int idx); +DECLARE_ASN1_ENCODE_FUNCTIONS_only(X509,X509_AUX) + +int i2d_re_X509_tbs(X509 *x, unsigned char **pp); + +int X509_SIG_INFO_get(const X509_SIG_INFO *siginf, int *mdnid, int *pknid, + int *secbits, uint32_t *flags); +void X509_SIG_INFO_set(X509_SIG_INFO *siginf, int mdnid, int pknid, + int secbits, uint32_t flags); + +int X509_get_signature_info(X509 *x, int *mdnid, int *pknid, int *secbits, + uint32_t *flags); + +void X509_get0_signature(const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg, const X509 *x); +int X509_get_signature_nid(const X509 *x); + +void X509_set0_distinguishing_id(X509 *x, ASN1_OCTET_STRING *d_id); +ASN1_OCTET_STRING *X509_get0_distinguishing_id(X509 *x); +void X509_REQ_set0_distinguishing_id(X509_REQ *x, ASN1_OCTET_STRING *d_id); +ASN1_OCTET_STRING *X509_REQ_get0_distinguishing_id(X509_REQ *x); + +int X509_alias_set1(X509 *x, const unsigned char *name, int len); +int X509_keyid_set1(X509 *x, const unsigned char *id, int len); +unsigned char *X509_alias_get0(X509 *x, int *len); +unsigned char *X509_keyid_get0(X509 *x, int *len); + +DECLARE_ASN1_FUNCTIONS(X509_REVOKED) +DECLARE_ASN1_FUNCTIONS(X509_CRL_INFO) +DECLARE_ASN1_FUNCTIONS(X509_CRL) +X509_CRL *X509_CRL_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +int X509_CRL_add0_revoked(X509_CRL *crl, X509_REVOKED *rev); +int X509_CRL_get0_by_serial(X509_CRL *crl, + X509_REVOKED **ret, const ASN1_INTEGER *serial); +int X509_CRL_get0_by_cert(X509_CRL *crl, X509_REVOKED **ret, X509 *x); + +X509_PKEY *X509_PKEY_new(void); +void X509_PKEY_free(X509_PKEY *a); + +DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKI) +DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKAC) +DECLARE_ASN1_FUNCTIONS(NETSCAPE_CERT_SEQUENCE) + +X509_INFO *X509_INFO_new(void); +void X509_INFO_free(X509_INFO *a); +char *X509_NAME_oneline(const X509_NAME *a, char *buf, int size); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *algor1, + ASN1_BIT_STRING *signature, char *data, EVP_PKEY *pkey); +OSSL_DEPRECATEDIN_3_0 +int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data, + unsigned char *md, unsigned int *len); +OSSL_DEPRECATEDIN_3_0 +int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, char *data, EVP_PKEY *pkey, + const EVP_MD *type); +#endif +int ASN1_item_digest(const ASN1_ITEM *it, const EVP_MD *type, void *data, + unsigned char *md, unsigned int *len); +int ASN1_item_verify(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + EVP_PKEY *pkey); +int ASN1_item_verify_ctx(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + EVP_MD_CTX *ctx); +int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, const void *data, + EVP_PKEY *pkey, const EVP_MD *md); +int ASN1_item_sign_ctx(const ASN1_ITEM *it, X509_ALGOR *algor1, + X509_ALGOR *algor2, ASN1_BIT_STRING *signature, + const void *data, EVP_MD_CTX *ctx); + +#define X509_VERSION_1 0 +#define X509_VERSION_2 1 +#define X509_VERSION_3 2 + +long X509_get_version(const X509 *x); +int X509_set_version(X509 *x, long version); +int X509_set_serialNumber(X509 *x, ASN1_INTEGER *serial); +ASN1_INTEGER *X509_get_serialNumber(X509 *x); +const ASN1_INTEGER *X509_get0_serialNumber(const X509 *x); +int X509_set_issuer_name(X509 *x, const X509_NAME *name); +X509_NAME *X509_get_issuer_name(const X509 *a); +int X509_set_subject_name(X509 *x, const X509_NAME *name); +X509_NAME *X509_get_subject_name(const X509 *a); +const ASN1_TIME * X509_get0_notBefore(const X509 *x); +ASN1_TIME *X509_getm_notBefore(const X509 *x); +int X509_set1_notBefore(X509 *x, const ASN1_TIME *tm); +const ASN1_TIME *X509_get0_notAfter(const X509 *x); +ASN1_TIME *X509_getm_notAfter(const X509 *x); +int X509_set1_notAfter(X509 *x, const ASN1_TIME *tm); +int X509_set_pubkey(X509 *x, EVP_PKEY *pkey); +int X509_up_ref(X509 *x); +int X509_get_signature_type(const X509 *x); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_get_notBefore X509_getm_notBefore +# define X509_get_notAfter X509_getm_notAfter +# define X509_set_notBefore X509_set1_notBefore +# define X509_set_notAfter X509_set1_notAfter +#endif + + +/* + * This one is only used so that a binary form can output, as in + * i2d_X509_PUBKEY(X509_get_X509_PUBKEY(x), &buf) + */ +X509_PUBKEY *X509_get_X509_PUBKEY(const X509 *x); +const STACK_OF(X509_EXTENSION) *X509_get0_extensions(const X509 *x); +void X509_get0_uids(const X509 *x, const ASN1_BIT_STRING **piuid, + const ASN1_BIT_STRING **psuid); +const X509_ALGOR *X509_get0_tbs_sigalg(const X509 *x); + +EVP_PKEY *X509_get0_pubkey(const X509 *x); +EVP_PKEY *X509_get_pubkey(X509 *x); +ASN1_BIT_STRING *X509_get0_pubkey_bitstr(const X509 *x); + +#define X509_REQ_VERSION_1 0 + +long X509_REQ_get_version(const X509_REQ *req); +int X509_REQ_set_version(X509_REQ *x, long version); +X509_NAME *X509_REQ_get_subject_name(const X509_REQ *req); +int X509_REQ_set_subject_name(X509_REQ *req, const X509_NAME *name); +void X509_REQ_get0_signature(const X509_REQ *req, const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +void X509_REQ_set0_signature(X509_REQ *req, ASN1_BIT_STRING *psig); +int X509_REQ_set1_signature_algo(X509_REQ *req, X509_ALGOR *palg); +int X509_REQ_get_signature_nid(const X509_REQ *req); +int i2d_re_X509_REQ_tbs(X509_REQ *req, unsigned char **pp); +int X509_REQ_set_pubkey(X509_REQ *x, EVP_PKEY *pkey); +EVP_PKEY *X509_REQ_get_pubkey(X509_REQ *req); +EVP_PKEY *X509_REQ_get0_pubkey(const X509_REQ *req); +X509_PUBKEY *X509_REQ_get_X509_PUBKEY(X509_REQ *req); +int X509_REQ_extension_nid(int nid); +int *X509_REQ_get_extension_nids(void); +void X509_REQ_set_extension_nids(int *nids); +STACK_OF(X509_EXTENSION) *X509_REQ_get_extensions(X509_REQ *req); +int X509_REQ_add_extensions_nid(X509_REQ *req, + const STACK_OF(X509_EXTENSION) *exts, int nid); +int X509_REQ_add_extensions(X509_REQ *req, const STACK_OF(X509_EXTENSION) *ext); +int X509_REQ_get_attr_count(const X509_REQ *req); +int X509_REQ_get_attr_by_NID(const X509_REQ *req, int nid, int lastpos); +int X509_REQ_get_attr_by_OBJ(const X509_REQ *req, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *X509_REQ_get_attr(const X509_REQ *req, int loc); +X509_ATTRIBUTE *X509_REQ_delete_attr(X509_REQ *req, int loc); +int X509_REQ_add1_attr(X509_REQ *req, X509_ATTRIBUTE *attr); +int X509_REQ_add1_attr_by_OBJ(X509_REQ *req, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len); +int X509_REQ_add1_attr_by_NID(X509_REQ *req, + int nid, int type, + const unsigned char *bytes, int len); +int X509_REQ_add1_attr_by_txt(X509_REQ *req, + const char *attrname, int type, + const unsigned char *bytes, int len); + +#define X509_CRL_VERSION_1 0 +#define X509_CRL_VERSION_2 1 + +int X509_CRL_set_version(X509_CRL *x, long version); +int X509_CRL_set_issuer_name(X509_CRL *x, const X509_NAME *name); +int X509_CRL_set1_lastUpdate(X509_CRL *x, const ASN1_TIME *tm); +int X509_CRL_set1_nextUpdate(X509_CRL *x, const ASN1_TIME *tm); +int X509_CRL_sort(X509_CRL *crl); +int X509_CRL_up_ref(X509_CRL *crl); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_CRL_set_lastUpdate X509_CRL_set1_lastUpdate +# define X509_CRL_set_nextUpdate X509_CRL_set1_nextUpdate +#endif + +long X509_CRL_get_version(const X509_CRL *crl); +const ASN1_TIME *X509_CRL_get0_lastUpdate(const X509_CRL *crl); +const ASN1_TIME *X509_CRL_get0_nextUpdate(const X509_CRL *crl); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 ASN1_TIME *X509_CRL_get_lastUpdate(X509_CRL *crl); +OSSL_DEPRECATEDIN_1_1_0 ASN1_TIME *X509_CRL_get_nextUpdate(X509_CRL *crl); +#endif +X509_NAME *X509_CRL_get_issuer(const X509_CRL *crl); +const STACK_OF(X509_EXTENSION) *X509_CRL_get0_extensions(const X509_CRL *crl); +STACK_OF(X509_REVOKED) *X509_CRL_get_REVOKED(X509_CRL *crl); +void X509_CRL_get0_signature(const X509_CRL *crl, const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +int X509_CRL_get_signature_nid(const X509_CRL *crl); +int i2d_re_X509_CRL_tbs(X509_CRL *req, unsigned char **pp); + +const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(const X509_REVOKED *x); +int X509_REVOKED_set_serialNumber(X509_REVOKED *x, ASN1_INTEGER *serial); +const ASN1_TIME *X509_REVOKED_get0_revocationDate(const X509_REVOKED *x); +int X509_REVOKED_set_revocationDate(X509_REVOKED *r, ASN1_TIME *tm); +const STACK_OF(X509_EXTENSION) * +X509_REVOKED_get0_extensions(const X509_REVOKED *r); + +X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer, + EVP_PKEY *skey, const EVP_MD *md, unsigned int flags); + +int X509_REQ_check_private_key(const X509_REQ *req, EVP_PKEY *pkey); + +int X509_check_private_key(const X509 *cert, const EVP_PKEY *pkey); +int X509_chain_check_suiteb(int *perror_depth, + X509 *x, STACK_OF(X509) *chain, + unsigned long flags); +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags); +void OSSL_STACK_OF_X509_free(STACK_OF(X509) *certs); +STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain); + +int X509_issuer_and_serial_cmp(const X509 *a, const X509 *b); +unsigned long X509_issuer_and_serial_hash(X509 *a); + +int X509_issuer_name_cmp(const X509 *a, const X509 *b); +unsigned long X509_issuer_name_hash(X509 *a); + +int X509_subject_name_cmp(const X509 *a, const X509 *b); +unsigned long X509_subject_name_hash(X509 *x); + +# ifndef OPENSSL_NO_MD5 +unsigned long X509_issuer_name_hash_old(X509 *a); +unsigned long X509_subject_name_hash_old(X509 *x); +# endif + +# define X509_ADD_FLAG_DEFAULT 0 +# define X509_ADD_FLAG_UP_REF 0x1 +# define X509_ADD_FLAG_PREPEND 0x2 +# define X509_ADD_FLAG_NO_DUP 0x4 +# define X509_ADD_FLAG_NO_SS 0x8 +int X509_add_cert(STACK_OF(X509) *sk, X509 *cert, int flags); +int X509_add_certs(STACK_OF(X509) *sk, STACK_OF(X509) *certs, int flags); + +int X509_cmp(const X509 *a, const X509 *b); +int X509_NAME_cmp(const X509_NAME *a, const X509_NAME *b); +#ifndef OPENSSL_NO_DEPRECATED_3_0 +# define X509_NAME_hash(x) X509_NAME_hash_ex(x, NULL, NULL, NULL) +OSSL_DEPRECATEDIN_3_0 int X509_certificate_type(const X509 *x, + const EVP_PKEY *pubkey); +#endif +unsigned long X509_NAME_hash_ex(const X509_NAME *x, OSSL_LIB_CTX *libctx, + const char *propq, int *ok); +unsigned long X509_NAME_hash_old(const X509_NAME *x); + +int X509_CRL_cmp(const X509_CRL *a, const X509_CRL *b); +int X509_CRL_match(const X509_CRL *a, const X509_CRL *b); +int X509_aux_print(BIO *out, X509 *x, int indent); +# ifndef OPENSSL_NO_STDIO +int X509_print_ex_fp(FILE *bp, X509 *x, unsigned long nmflag, + unsigned long cflag); +int X509_print_fp(FILE *bp, X509 *x); +int X509_CRL_print_fp(FILE *bp, X509_CRL *x); +int X509_REQ_print_fp(FILE *bp, X509_REQ *req); +int X509_NAME_print_ex_fp(FILE *fp, const X509_NAME *nm, int indent, + unsigned long flags); +# endif + +int X509_NAME_print(BIO *bp, const X509_NAME *name, int obase); +int X509_NAME_print_ex(BIO *out, const X509_NAME *nm, int indent, + unsigned long flags); +int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflag, + unsigned long cflag); +int X509_print(BIO *bp, X509 *x); +int X509_ocspid_print(BIO *bp, X509 *x); +int X509_CRL_print_ex(BIO *out, X509_CRL *x, unsigned long nmflag); +int X509_CRL_print(BIO *bp, X509_CRL *x); +int X509_REQ_print_ex(BIO *bp, X509_REQ *x, unsigned long nmflag, + unsigned long cflag); +int X509_REQ_print(BIO *bp, X509_REQ *req); + +int X509_NAME_entry_count(const X509_NAME *name); +int X509_NAME_get_text_by_NID(const X509_NAME *name, int nid, + char *buf, int len); +int X509_NAME_get_text_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj, + char *buf, int len); + +/* + * NOTE: you should be passing -1, not 0 as lastpos. The functions that use + * lastpos, search after that position on. + */ +int X509_NAME_get_index_by_NID(const X509_NAME *name, int nid, int lastpos); +int X509_NAME_get_index_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj, + int lastpos); +X509_NAME_ENTRY *X509_NAME_get_entry(const X509_NAME *name, int loc); +X509_NAME_ENTRY *X509_NAME_delete_entry(X509_NAME *name, int loc); +int X509_NAME_add_entry(X509_NAME *name, const X509_NAME_ENTRY *ne, + int loc, int set); +int X509_NAME_add_entry_by_OBJ(X509_NAME *name, const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len, int loc, + int set); +int X509_NAME_add_entry_by_NID(X509_NAME *name, int nid, int type, + const unsigned char *bytes, int len, int loc, + int set); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_txt(X509_NAME_ENTRY **ne, + const char *field, int type, + const unsigned char *bytes, + int len); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_NID(X509_NAME_ENTRY **ne, int nid, + int type, + const unsigned char *bytes, + int len); +int X509_NAME_add_entry_by_txt(X509_NAME *name, const char *field, int type, + const unsigned char *bytes, int len, int loc, + int set); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_OBJ(X509_NAME_ENTRY **ne, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, + int len); +int X509_NAME_ENTRY_set_object(X509_NAME_ENTRY *ne, const ASN1_OBJECT *obj); +int X509_NAME_ENTRY_set_data(X509_NAME_ENTRY *ne, int type, + const unsigned char *bytes, int len); +ASN1_OBJECT *X509_NAME_ENTRY_get_object(const X509_NAME_ENTRY *ne); +ASN1_STRING * X509_NAME_ENTRY_get_data(const X509_NAME_ENTRY *ne); +int X509_NAME_ENTRY_set(const X509_NAME_ENTRY *ne); + +int X509_NAME_get0_der(const X509_NAME *nm, const unsigned char **pder, + size_t *pderlen); + +int X509v3_get_ext_count(const STACK_OF(X509_EXTENSION) *x); +int X509v3_get_ext_by_NID(const STACK_OF(X509_EXTENSION) *x, + int nid, int lastpos); +int X509v3_get_ext_by_OBJ(const STACK_OF(X509_EXTENSION) *x, + const ASN1_OBJECT *obj, int lastpos); +int X509v3_get_ext_by_critical(const STACK_OF(X509_EXTENSION) *x, + int crit, int lastpos); +X509_EXTENSION *X509v3_get_ext(const STACK_OF(X509_EXTENSION) *x, int loc); +X509_EXTENSION *X509v3_delete_ext(STACK_OF(X509_EXTENSION) *x, int loc); +STACK_OF(X509_EXTENSION) *X509v3_add_ext(STACK_OF(X509_EXTENSION) **x, + X509_EXTENSION *ex, int loc); + +int X509_get_ext_count(const X509 *x); +int X509_get_ext_by_NID(const X509 *x, int nid, int lastpos); +int X509_get_ext_by_OBJ(const X509 *x, const ASN1_OBJECT *obj, int lastpos); +int X509_get_ext_by_critical(const X509 *x, int crit, int lastpos); +X509_EXTENSION *X509_get_ext(const X509 *x, int loc); +X509_EXTENSION *X509_delete_ext(X509 *x, int loc); +int X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc); +void *X509_get_ext_d2i(const X509 *x, int nid, int *crit, int *idx); +int X509_add1_ext_i2d(X509 *x, int nid, void *value, int crit, + unsigned long flags); + +int X509_CRL_get_ext_count(const X509_CRL *x); +int X509_CRL_get_ext_by_NID(const X509_CRL *x, int nid, int lastpos); +int X509_CRL_get_ext_by_OBJ(const X509_CRL *x, const ASN1_OBJECT *obj, + int lastpos); +int X509_CRL_get_ext_by_critical(const X509_CRL *x, int crit, int lastpos); +X509_EXTENSION *X509_CRL_get_ext(const X509_CRL *x, int loc); +X509_EXTENSION *X509_CRL_delete_ext(X509_CRL *x, int loc); +int X509_CRL_add_ext(X509_CRL *x, X509_EXTENSION *ex, int loc); +void *X509_CRL_get_ext_d2i(const X509_CRL *x, int nid, int *crit, int *idx); +int X509_CRL_add1_ext_i2d(X509_CRL *x, int nid, void *value, int crit, + unsigned long flags); + +int X509_REVOKED_get_ext_count(const X509_REVOKED *x); +int X509_REVOKED_get_ext_by_NID(const X509_REVOKED *x, int nid, int lastpos); +int X509_REVOKED_get_ext_by_OBJ(const X509_REVOKED *x, const ASN1_OBJECT *obj, + int lastpos); +int X509_REVOKED_get_ext_by_critical(const X509_REVOKED *x, int crit, + int lastpos); +X509_EXTENSION *X509_REVOKED_get_ext(const X509_REVOKED *x, int loc); +X509_EXTENSION *X509_REVOKED_delete_ext(X509_REVOKED *x, int loc); +int X509_REVOKED_add_ext(X509_REVOKED *x, X509_EXTENSION *ex, int loc); +void *X509_REVOKED_get_ext_d2i(const X509_REVOKED *x, int nid, int *crit, + int *idx); +int X509_REVOKED_add1_ext_i2d(X509_REVOKED *x, int nid, void *value, int crit, + unsigned long flags); + +X509_EXTENSION *X509_EXTENSION_create_by_NID(X509_EXTENSION **ex, + int nid, int crit, + ASN1_OCTET_STRING *data); +X509_EXTENSION *X509_EXTENSION_create_by_OBJ(X509_EXTENSION **ex, + const ASN1_OBJECT *obj, int crit, + ASN1_OCTET_STRING *data); +int X509_EXTENSION_set_object(X509_EXTENSION *ex, const ASN1_OBJECT *obj); +int X509_EXTENSION_set_critical(X509_EXTENSION *ex, int crit); +int X509_EXTENSION_set_data(X509_EXTENSION *ex, ASN1_OCTET_STRING *data); +ASN1_OBJECT *X509_EXTENSION_get_object(X509_EXTENSION *ex); +ASN1_OCTET_STRING *X509_EXTENSION_get_data(X509_EXTENSION *ne); +int X509_EXTENSION_get_critical(const X509_EXTENSION *ex); + +int X509at_get_attr_count(const STACK_OF(X509_ATTRIBUTE) *x); +int X509at_get_attr_by_NID(const STACK_OF(X509_ATTRIBUTE) *x, int nid, + int lastpos); +int X509at_get_attr_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *sk, + const ASN1_OBJECT *obj, int lastpos); +X509_ATTRIBUTE *X509at_get_attr(const STACK_OF(X509_ATTRIBUTE) *x, int loc); +X509_ATTRIBUTE *X509at_delete_attr(STACK_OF(X509_ATTRIBUTE) *x, int loc); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr(STACK_OF(X509_ATTRIBUTE) **x, + X509_ATTRIBUTE *attr); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_OBJ(STACK_OF(X509_ATTRIBUTE) + **x, const ASN1_OBJECT *obj, + int type, + const unsigned char *bytes, + int len); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_NID(STACK_OF(X509_ATTRIBUTE) + **x, int nid, int type, + const unsigned char *bytes, + int len); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_txt(STACK_OF(X509_ATTRIBUTE) + **x, const char *attrname, + int type, + const unsigned char *bytes, + int len); +void *X509at_get0_data_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *x, + const ASN1_OBJECT *obj, int lastpos, int type); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_NID(X509_ATTRIBUTE **attr, int nid, + int atrtype, const void *data, + int len); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_OBJ(X509_ATTRIBUTE **attr, + const ASN1_OBJECT *obj, + int atrtype, const void *data, + int len); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_txt(X509_ATTRIBUTE **attr, + const char *atrname, int type, + const unsigned char *bytes, + int len); +int X509_ATTRIBUTE_set1_object(X509_ATTRIBUTE *attr, const ASN1_OBJECT *obj); +int X509_ATTRIBUTE_set1_data(X509_ATTRIBUTE *attr, int attrtype, + const void *data, int len); +void *X509_ATTRIBUTE_get0_data(X509_ATTRIBUTE *attr, int idx, int atrtype, + void *data); +int X509_ATTRIBUTE_count(const X509_ATTRIBUTE *attr); +ASN1_OBJECT *X509_ATTRIBUTE_get0_object(X509_ATTRIBUTE *attr); +ASN1_TYPE *X509_ATTRIBUTE_get0_type(X509_ATTRIBUTE *attr, int idx); + +int EVP_PKEY_get_attr_count(const EVP_PKEY *key); +int EVP_PKEY_get_attr_by_NID(const EVP_PKEY *key, int nid, int lastpos); +int EVP_PKEY_get_attr_by_OBJ(const EVP_PKEY *key, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *EVP_PKEY_get_attr(const EVP_PKEY *key, int loc); +X509_ATTRIBUTE *EVP_PKEY_delete_attr(EVP_PKEY *key, int loc); +int EVP_PKEY_add1_attr(EVP_PKEY *key, X509_ATTRIBUTE *attr); +int EVP_PKEY_add1_attr_by_OBJ(EVP_PKEY *key, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len); +int EVP_PKEY_add1_attr_by_NID(EVP_PKEY *key, + int nid, int type, + const unsigned char *bytes, int len); +int EVP_PKEY_add1_attr_by_txt(EVP_PKEY *key, + const char *attrname, int type, + const unsigned char *bytes, int len); + +/* lookup a cert from a X509 STACK */ +X509 *X509_find_by_issuer_and_serial(STACK_OF(X509) *sk, const X509_NAME *name, + const ASN1_INTEGER *serial); +X509 *X509_find_by_subject(STACK_OF(X509) *sk, const X509_NAME *name); + +DECLARE_ASN1_FUNCTIONS(PBEPARAM) +DECLARE_ASN1_FUNCTIONS(PBE2PARAM) +DECLARE_ASN1_FUNCTIONS(PBKDF2PARAM) +#ifndef OPENSSL_NO_SCRYPT +DECLARE_ASN1_FUNCTIONS(SCRYPT_PARAMS) +#endif + +int PKCS5_pbe_set0_algor(X509_ALGOR *algor, int alg, int iter, + const unsigned char *salt, int saltlen); +int PKCS5_pbe_set0_algor_ex(X509_ALGOR *algor, int alg, int iter, + const unsigned char *salt, int saltlen, + OSSL_LIB_CTX *libctx); + +X509_ALGOR *PKCS5_pbe_set(int alg, int iter, + const unsigned char *salt, int saltlen); +X509_ALGOR *PKCS5_pbe_set_ex(int alg, int iter, + const unsigned char *salt, int saltlen, + OSSL_LIB_CTX *libctx); + +X509_ALGOR *PKCS5_pbe2_set(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen); +X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen, + unsigned char *aiv, int prf_nid); +X509_ALGOR *PKCS5_pbe2_set_iv_ex(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen, + unsigned char *aiv, int prf_nid, + OSSL_LIB_CTX *libctx); + +#ifndef OPENSSL_NO_SCRYPT +X509_ALGOR *PKCS5_pbe2_set_scrypt(const EVP_CIPHER *cipher, + const unsigned char *salt, int saltlen, + unsigned char *aiv, uint64_t N, uint64_t r, + uint64_t p); +#endif + +X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen); +X509_ALGOR *PKCS5_pbkdf2_set_ex(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen, + OSSL_LIB_CTX *libctx); + +/* PKCS#8 utilities */ + +DECLARE_ASN1_FUNCTIONS(PKCS8_PRIV_KEY_INFO) + +EVP_PKEY *EVP_PKCS82PKEY(const PKCS8_PRIV_KEY_INFO *p8); +EVP_PKEY *EVP_PKCS82PKEY_ex(const PKCS8_PRIV_KEY_INFO *p8, OSSL_LIB_CTX *libctx, + const char *propq); +PKCS8_PRIV_KEY_INFO *EVP_PKEY2PKCS8(const EVP_PKEY *pkey); + +int PKCS8_pkey_set0(PKCS8_PRIV_KEY_INFO *priv, ASN1_OBJECT *aobj, + int version, int ptype, void *pval, + unsigned char *penc, int penclen); +int PKCS8_pkey_get0(const ASN1_OBJECT **ppkalg, + const unsigned char **pk, int *ppklen, + const X509_ALGOR **pa, const PKCS8_PRIV_KEY_INFO *p8); + +const STACK_OF(X509_ATTRIBUTE) * +PKCS8_pkey_get0_attrs(const PKCS8_PRIV_KEY_INFO *p8); +int PKCS8_pkey_add1_attr(PKCS8_PRIV_KEY_INFO *p8, X509_ATTRIBUTE *attr); +int PKCS8_pkey_add1_attr_by_NID(PKCS8_PRIV_KEY_INFO *p8, int nid, int type, + const unsigned char *bytes, int len); +int PKCS8_pkey_add1_attr_by_OBJ(PKCS8_PRIV_KEY_INFO *p8, const ASN1_OBJECT *obj, + int type, const unsigned char *bytes, int len); + + +void X509_PUBKEY_set0_public_key(X509_PUBKEY *pub, + unsigned char *penc, int penclen); +int X509_PUBKEY_set0_param(X509_PUBKEY *pub, ASN1_OBJECT *aobj, + int ptype, void *pval, + unsigned char *penc, int penclen); +int X509_PUBKEY_get0_param(ASN1_OBJECT **ppkalg, + const unsigned char **pk, int *ppklen, + X509_ALGOR **pa, const X509_PUBKEY *pub); +int X509_PUBKEY_eq(const X509_PUBKEY *a, const X509_PUBKEY *b); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h new file mode 100644 index 00000000000..d3dfff8c0d9 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509_vfy.h @@ -0,0 +1,901 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509_vfy.h.in + * + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_VFY_H +# define OPENSSL_X509_VFY_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509_VFY_H +# endif + +/* + * Protect against recursion, x509.h and x509_vfy.h each include the other. + */ +# ifndef OPENSSL_X509_H +# include +# endif + +# include +# include +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/*- +SSL_CTX -> X509_STORE + -> X509_LOOKUP + ->X509_LOOKUP_METHOD + -> X509_LOOKUP + ->X509_LOOKUP_METHOD + +SSL -> X509_STORE_CTX + ->X509_STORE + +The X509_STORE holds the tables etc for verification stuff. +A X509_STORE_CTX is used while validating a single certificate. +The X509_STORE has X509_LOOKUPs for looking up certs. +The X509_STORE then calls a function to actually verify the +certificate chain. +*/ + +typedef enum { + X509_LU_NONE = 0, + X509_LU_X509, X509_LU_CRL +} X509_LOOKUP_TYPE; + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +#define X509_LU_RETRY -1 +#define X509_LU_FAIL 0 +#endif + +SKM_DEFINE_STACK_OF_INTERNAL(X509_LOOKUP, X509_LOOKUP, X509_LOOKUP) +#define sk_X509_LOOKUP_num(sk) OPENSSL_sk_num(ossl_check_const_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_value(sk, idx) ((X509_LOOKUP *)OPENSSL_sk_value(ossl_check_const_X509_LOOKUP_sk_type(sk), (idx))) +#define sk_X509_LOOKUP_new(cmp) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new(ossl_check_X509_LOOKUP_compfunc_type(cmp))) +#define sk_X509_LOOKUP_new_null() ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new_null()) +#define sk_X509_LOOKUP_new_reserve(cmp, n) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new_reserve(ossl_check_X509_LOOKUP_compfunc_type(cmp), (n))) +#define sk_X509_LOOKUP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_LOOKUP_sk_type(sk), (n)) +#define sk_X509_LOOKUP_free(sk) OPENSSL_sk_free(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_zero(sk) OPENSSL_sk_zero(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_delete(sk, i) ((X509_LOOKUP *)OPENSSL_sk_delete(ossl_check_X509_LOOKUP_sk_type(sk), (i))) +#define sk_X509_LOOKUP_delete_ptr(sk, ptr) ((X509_LOOKUP *)OPENSSL_sk_delete_ptr(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr))) +#define sk_X509_LOOKUP_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_pop(sk) ((X509_LOOKUP *)OPENSSL_sk_pop(ossl_check_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_shift(sk) ((X509_LOOKUP *)OPENSSL_sk_shift(ossl_check_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_LOOKUP_sk_type(sk),ossl_check_X509_LOOKUP_freefunc_type(freefunc)) +#define sk_X509_LOOKUP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr), (idx)) +#define sk_X509_LOOKUP_set(sk, idx, ptr) ((X509_LOOKUP *)OPENSSL_sk_set(ossl_check_X509_LOOKUP_sk_type(sk), (idx), ossl_check_X509_LOOKUP_type(ptr))) +#define sk_X509_LOOKUP_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr), pnum) +#define sk_X509_LOOKUP_sort(sk) OPENSSL_sk_sort(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_dup(sk) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_dup(ossl_check_const_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_copyfunc_type(copyfunc), ossl_check_X509_LOOKUP_freefunc_type(freefunc))) +#define sk_X509_LOOKUP_set_cmp_func(sk, cmp) ((sk_X509_LOOKUP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_OBJECT, X509_OBJECT, X509_OBJECT) +#define sk_X509_OBJECT_num(sk) OPENSSL_sk_num(ossl_check_const_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_value(sk, idx) ((X509_OBJECT *)OPENSSL_sk_value(ossl_check_const_X509_OBJECT_sk_type(sk), (idx))) +#define sk_X509_OBJECT_new(cmp) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new(ossl_check_X509_OBJECT_compfunc_type(cmp))) +#define sk_X509_OBJECT_new_null() ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new_null()) +#define sk_X509_OBJECT_new_reserve(cmp, n) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new_reserve(ossl_check_X509_OBJECT_compfunc_type(cmp), (n))) +#define sk_X509_OBJECT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_OBJECT_sk_type(sk), (n)) +#define sk_X509_OBJECT_free(sk) OPENSSL_sk_free(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_zero(sk) OPENSSL_sk_zero(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_delete(sk, i) ((X509_OBJECT *)OPENSSL_sk_delete(ossl_check_X509_OBJECT_sk_type(sk), (i))) +#define sk_X509_OBJECT_delete_ptr(sk, ptr) ((X509_OBJECT *)OPENSSL_sk_delete_ptr(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr))) +#define sk_X509_OBJECT_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_pop(sk) ((X509_OBJECT *)OPENSSL_sk_pop(ossl_check_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_shift(sk) ((X509_OBJECT *)OPENSSL_sk_shift(ossl_check_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_OBJECT_sk_type(sk),ossl_check_X509_OBJECT_freefunc_type(freefunc)) +#define sk_X509_OBJECT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr), (idx)) +#define sk_X509_OBJECT_set(sk, idx, ptr) ((X509_OBJECT *)OPENSSL_sk_set(ossl_check_X509_OBJECT_sk_type(sk), (idx), ossl_check_X509_OBJECT_type(ptr))) +#define sk_X509_OBJECT_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr), pnum) +#define sk_X509_OBJECT_sort(sk) OPENSSL_sk_sort(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_dup(sk) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_dup(ossl_check_const_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_copyfunc_type(copyfunc), ossl_check_X509_OBJECT_freefunc_type(freefunc))) +#define sk_X509_OBJECT_set_cmp_func(sk, cmp) ((sk_X509_OBJECT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_VERIFY_PARAM, X509_VERIFY_PARAM, X509_VERIFY_PARAM) +#define sk_X509_VERIFY_PARAM_num(sk) OPENSSL_sk_num(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_value(sk, idx) ((X509_VERIFY_PARAM *)OPENSSL_sk_value(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk), (idx))) +#define sk_X509_VERIFY_PARAM_new(cmp) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new(ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp))) +#define sk_X509_VERIFY_PARAM_new_null() ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new_null()) +#define sk_X509_VERIFY_PARAM_new_reserve(cmp, n) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new_reserve(ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp), (n))) +#define sk_X509_VERIFY_PARAM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (n)) +#define sk_X509_VERIFY_PARAM_free(sk) OPENSSL_sk_free(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_zero(sk) OPENSSL_sk_zero(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_delete(sk, i) ((X509_VERIFY_PARAM *)OPENSSL_sk_delete(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (i))) +#define sk_X509_VERIFY_PARAM_delete_ptr(sk, ptr) ((X509_VERIFY_PARAM *)OPENSSL_sk_delete_ptr(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr))) +#define sk_X509_VERIFY_PARAM_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_pop(sk) ((X509_VERIFY_PARAM *)OPENSSL_sk_pop(ossl_check_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_shift(sk) ((X509_VERIFY_PARAM *)OPENSSL_sk_shift(ossl_check_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_VERIFY_PARAM_sk_type(sk),ossl_check_X509_VERIFY_PARAM_freefunc_type(freefunc)) +#define sk_X509_VERIFY_PARAM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr), (idx)) +#define sk_X509_VERIFY_PARAM_set(sk, idx, ptr) ((X509_VERIFY_PARAM *)OPENSSL_sk_set(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (idx), ossl_check_X509_VERIFY_PARAM_type(ptr))) +#define sk_X509_VERIFY_PARAM_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr), pnum) +#define sk_X509_VERIFY_PARAM_sort(sk) OPENSSL_sk_sort(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_dup(sk) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_dup(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_copyfunc_type(copyfunc), ossl_check_X509_VERIFY_PARAM_freefunc_type(freefunc))) +#define sk_X509_VERIFY_PARAM_set_cmp_func(sk, cmp) ((sk_X509_VERIFY_PARAM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp))) + + +/* This is used for a table of trust checking functions */ +typedef struct x509_trust_st { + int trust; + int flags; + int (*check_trust) (struct x509_trust_st *, X509 *, int); + char *name; + int arg1; + void *arg2; +} X509_TRUST; +SKM_DEFINE_STACK_OF_INTERNAL(X509_TRUST, X509_TRUST, X509_TRUST) +#define sk_X509_TRUST_num(sk) OPENSSL_sk_num(ossl_check_const_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_value(sk, idx) ((X509_TRUST *)OPENSSL_sk_value(ossl_check_const_X509_TRUST_sk_type(sk), (idx))) +#define sk_X509_TRUST_new(cmp) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new(ossl_check_X509_TRUST_compfunc_type(cmp))) +#define sk_X509_TRUST_new_null() ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new_null()) +#define sk_X509_TRUST_new_reserve(cmp, n) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new_reserve(ossl_check_X509_TRUST_compfunc_type(cmp), (n))) +#define sk_X509_TRUST_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_TRUST_sk_type(sk), (n)) +#define sk_X509_TRUST_free(sk) OPENSSL_sk_free(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_zero(sk) OPENSSL_sk_zero(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_delete(sk, i) ((X509_TRUST *)OPENSSL_sk_delete(ossl_check_X509_TRUST_sk_type(sk), (i))) +#define sk_X509_TRUST_delete_ptr(sk, ptr) ((X509_TRUST *)OPENSSL_sk_delete_ptr(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr))) +#define sk_X509_TRUST_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_pop(sk) ((X509_TRUST *)OPENSSL_sk_pop(ossl_check_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_shift(sk) ((X509_TRUST *)OPENSSL_sk_shift(ossl_check_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_TRUST_sk_type(sk),ossl_check_X509_TRUST_freefunc_type(freefunc)) +#define sk_X509_TRUST_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr), (idx)) +#define sk_X509_TRUST_set(sk, idx, ptr) ((X509_TRUST *)OPENSSL_sk_set(ossl_check_X509_TRUST_sk_type(sk), (idx), ossl_check_X509_TRUST_type(ptr))) +#define sk_X509_TRUST_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr), pnum) +#define sk_X509_TRUST_sort(sk) OPENSSL_sk_sort(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_dup(sk) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_dup(ossl_check_const_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_copyfunc_type(copyfunc), ossl_check_X509_TRUST_freefunc_type(freefunc))) +#define sk_X509_TRUST_set_cmp_func(sk, cmp) ((sk_X509_TRUST_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_compfunc_type(cmp))) + + +/* standard trust ids */ +# define X509_TRUST_DEFAULT 0 /* Only valid in purpose settings */ +# define X509_TRUST_COMPAT 1 +# define X509_TRUST_SSL_CLIENT 2 +# define X509_TRUST_SSL_SERVER 3 +# define X509_TRUST_EMAIL 4 +# define X509_TRUST_OBJECT_SIGN 5 +# define X509_TRUST_OCSP_SIGN 6 +# define X509_TRUST_OCSP_REQUEST 7 +# define X509_TRUST_TSA 8 +/* Keep these up to date! */ +# define X509_TRUST_MIN 1 +# define X509_TRUST_MAX 8 + +/* trust_flags values */ +# define X509_TRUST_DYNAMIC (1U << 0) +# define X509_TRUST_DYNAMIC_NAME (1U << 1) +/* No compat trust if self-signed, preempts "DO_SS" */ +# define X509_TRUST_NO_SS_COMPAT (1U << 2) +/* Compat trust if no explicit accepted trust EKUs */ +# define X509_TRUST_DO_SS_COMPAT (1U << 3) +/* Accept "anyEKU" as a wildcard rejection OID and as a wildcard trust OID */ +# define X509_TRUST_OK_ANY_EKU (1U << 4) + +/* check_trust return codes */ +# define X509_TRUST_TRUSTED 1 +# define X509_TRUST_REJECTED 2 +# define X509_TRUST_UNTRUSTED 3 + +int X509_TRUST_set(int *t, int trust); +int X509_TRUST_get_count(void); +X509_TRUST *X509_TRUST_get0(int idx); +int X509_TRUST_get_by_id(int id); +int X509_TRUST_add(int id, int flags, int (*ck) (X509_TRUST *, X509 *, int), + const char *name, int arg1, void *arg2); +void X509_TRUST_cleanup(void); +int X509_TRUST_get_flags(const X509_TRUST *xp); +char *X509_TRUST_get0_name(const X509_TRUST *xp); +int X509_TRUST_get_trust(const X509_TRUST *xp); + +int X509_trusted(const X509 *x); +int X509_add1_trust_object(X509 *x, const ASN1_OBJECT *obj); +int X509_add1_reject_object(X509 *x, const ASN1_OBJECT *obj); +void X509_trust_clear(X509 *x); +void X509_reject_clear(X509 *x); +STACK_OF(ASN1_OBJECT) *X509_get0_trust_objects(X509 *x); +STACK_OF(ASN1_OBJECT) *X509_get0_reject_objects(X509 *x); + +int (*X509_TRUST_set_default(int (*trust) (int, X509 *, int))) (int, X509 *, + int); +int X509_check_trust(X509 *x, int id, int flags); + +int X509_verify_cert(X509_STORE_CTX *ctx); +int X509_STORE_CTX_verify(X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_build_chain(X509 *target, STACK_OF(X509) *certs, + X509_STORE *store, int with_self_signed, + OSSL_LIB_CTX *libctx, const char *propq); + +int X509_STORE_set_depth(X509_STORE *store, int depth); + +typedef int (*X509_STORE_CTX_verify_cb)(int, X509_STORE_CTX *); +int X509_STORE_CTX_print_verify_cb(int ok, X509_STORE_CTX *ctx); +typedef int (*X509_STORE_CTX_verify_fn)(X509_STORE_CTX *); +typedef int (*X509_STORE_CTX_get_issuer_fn)(X509 **issuer, + X509_STORE_CTX *ctx, X509 *x); +typedef int (*X509_STORE_CTX_check_issued_fn)(X509_STORE_CTX *ctx, + X509 *x, X509 *issuer); +typedef int (*X509_STORE_CTX_check_revocation_fn)(X509_STORE_CTX *ctx); +typedef int (*X509_STORE_CTX_get_crl_fn)(X509_STORE_CTX *ctx, + X509_CRL **crl, X509 *x); +typedef int (*X509_STORE_CTX_check_crl_fn)(X509_STORE_CTX *ctx, X509_CRL *crl); +typedef int (*X509_STORE_CTX_cert_crl_fn)(X509_STORE_CTX *ctx, + X509_CRL *crl, X509 *x); +typedef int (*X509_STORE_CTX_check_policy_fn)(X509_STORE_CTX *ctx); +typedef STACK_OF(X509) + *(*X509_STORE_CTX_lookup_certs_fn)(X509_STORE_CTX *ctx, + const X509_NAME *nm); +typedef STACK_OF(X509_CRL) + *(*X509_STORE_CTX_lookup_crls_fn)(const X509_STORE_CTX *ctx, + const X509_NAME *nm); +typedef int (*X509_STORE_CTX_cleanup_fn)(X509_STORE_CTX *ctx); + +void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); + +# define X509_STORE_CTX_set_app_data(ctx,data) \ + X509_STORE_CTX_set_ex_data(ctx,0,data) +# define X509_STORE_CTX_get_app_data(ctx) \ + X509_STORE_CTX_get_ex_data(ctx,0) + +# define X509_L_FILE_LOAD 1 +# define X509_L_ADD_DIR 2 +# define X509_L_ADD_STORE 3 +# define X509_L_LOAD_STORE 4 + +# define X509_LOOKUP_load_file(x,name,type) \ + X509_LOOKUP_ctrl((x),X509_L_FILE_LOAD,(name),(long)(type),NULL) + +# define X509_LOOKUP_add_dir(x,name,type) \ + X509_LOOKUP_ctrl((x),X509_L_ADD_DIR,(name),(long)(type),NULL) + +# define X509_LOOKUP_add_store(x,name) \ + X509_LOOKUP_ctrl((x),X509_L_ADD_STORE,(name),0,NULL) + +# define X509_LOOKUP_load_store(x,name) \ + X509_LOOKUP_ctrl((x),X509_L_LOAD_STORE,(name),0,NULL) + +# define X509_LOOKUP_load_file_ex(x, name, type, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_FILE_LOAD, (name), (long)(type), NULL,\ + (libctx), (propq)) + +# define X509_LOOKUP_load_store_ex(x, name, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_LOAD_STORE, (name), 0, NULL, \ + (libctx), (propq)) + +# define X509_LOOKUP_add_store_ex(x, name, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_ADD_STORE, (name), 0, NULL, \ + (libctx), (propq)) + +# define X509_V_OK 0 +# define X509_V_ERR_UNSPECIFIED 1 +# define X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT 2 +# define X509_V_ERR_UNABLE_TO_GET_CRL 3 +# define X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE 4 +# define X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE 5 +# define X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY 6 +# define X509_V_ERR_CERT_SIGNATURE_FAILURE 7 +# define X509_V_ERR_CRL_SIGNATURE_FAILURE 8 +# define X509_V_ERR_CERT_NOT_YET_VALID 9 +# define X509_V_ERR_CERT_HAS_EXPIRED 10 +# define X509_V_ERR_CRL_NOT_YET_VALID 11 +# define X509_V_ERR_CRL_HAS_EXPIRED 12 +# define X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD 13 +# define X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD 14 +# define X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD 15 +# define X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD 16 +# define X509_V_ERR_OUT_OF_MEM 17 +# define X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT 18 +# define X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN 19 +# define X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY 20 +# define X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE 21 +# define X509_V_ERR_CERT_CHAIN_TOO_LONG 22 +# define X509_V_ERR_CERT_REVOKED 23 +# define X509_V_ERR_NO_ISSUER_PUBLIC_KEY 24 +# define X509_V_ERR_PATH_LENGTH_EXCEEDED 25 +# define X509_V_ERR_INVALID_PURPOSE 26 +# define X509_V_ERR_CERT_UNTRUSTED 27 +# define X509_V_ERR_CERT_REJECTED 28 + +/* These are 'informational' when looking for issuer cert */ +# define X509_V_ERR_SUBJECT_ISSUER_MISMATCH 29 +# define X509_V_ERR_AKID_SKID_MISMATCH 30 +# define X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH 31 +# define X509_V_ERR_KEYUSAGE_NO_CERTSIGN 32 +# define X509_V_ERR_UNABLE_TO_GET_CRL_ISSUER 33 +# define X509_V_ERR_UNHANDLED_CRITICAL_EXTENSION 34 +# define X509_V_ERR_KEYUSAGE_NO_CRL_SIGN 35 +# define X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION 36 +# define X509_V_ERR_INVALID_NON_CA 37 +# define X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED 38 +# define X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE 39 +# define X509_V_ERR_PROXY_CERTIFICATES_NOT_ALLOWED 40 +# define X509_V_ERR_INVALID_EXTENSION 41 +# define X509_V_ERR_INVALID_POLICY_EXTENSION 42 +# define X509_V_ERR_NO_EXPLICIT_POLICY 43 +# define X509_V_ERR_DIFFERENT_CRL_SCOPE 44 +# define X509_V_ERR_UNSUPPORTED_EXTENSION_FEATURE 45 +# define X509_V_ERR_UNNESTED_RESOURCE 46 +# define X509_V_ERR_PERMITTED_VIOLATION 47 +# define X509_V_ERR_EXCLUDED_VIOLATION 48 +# define X509_V_ERR_SUBTREE_MINMAX 49 +/* The application is not happy */ +# define X509_V_ERR_APPLICATION_VERIFICATION 50 +# define X509_V_ERR_UNSUPPORTED_CONSTRAINT_TYPE 51 +# define X509_V_ERR_UNSUPPORTED_CONSTRAINT_SYNTAX 52 +# define X509_V_ERR_UNSUPPORTED_NAME_SYNTAX 53 +# define X509_V_ERR_CRL_PATH_VALIDATION_ERROR 54 +/* Another issuer check debug option */ +# define X509_V_ERR_PATH_LOOP 55 +/* Suite B mode algorithm violation */ +# define X509_V_ERR_SUITE_B_INVALID_VERSION 56 +# define X509_V_ERR_SUITE_B_INVALID_ALGORITHM 57 +# define X509_V_ERR_SUITE_B_INVALID_CURVE 58 +# define X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM 59 +# define X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED 60 +# define X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256 61 +/* Host, email and IP check errors */ +# define X509_V_ERR_HOSTNAME_MISMATCH 62 +# define X509_V_ERR_EMAIL_MISMATCH 63 +# define X509_V_ERR_IP_ADDRESS_MISMATCH 64 +/* DANE TLSA errors */ +# define X509_V_ERR_DANE_NO_MATCH 65 +/* security level errors */ +# define X509_V_ERR_EE_KEY_TOO_SMALL 66 +# define X509_V_ERR_CA_KEY_TOO_SMALL 67 +# define X509_V_ERR_CA_MD_TOO_WEAK 68 +/* Caller error */ +# define X509_V_ERR_INVALID_CALL 69 +/* Issuer lookup error */ +# define X509_V_ERR_STORE_LOOKUP 70 +/* Certificate transparency */ +# define X509_V_ERR_NO_VALID_SCTS 71 + +# define X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION 72 +/* OCSP status errors */ +# define X509_V_ERR_OCSP_VERIFY_NEEDED 73 /* Need OCSP verification */ +# define X509_V_ERR_OCSP_VERIFY_FAILED 74 /* Couldn't verify cert through OCSP */ +# define X509_V_ERR_OCSP_CERT_UNKNOWN 75 /* Certificate wasn't recognized by the OCSP responder */ + +# define X509_V_ERR_UNSUPPORTED_SIGNATURE_ALGORITHM 76 +# define X509_V_ERR_SIGNATURE_ALGORITHM_MISMATCH 77 + +/* Errors in case a check in X509_V_FLAG_X509_STRICT mode fails */ +# define X509_V_ERR_SIGNATURE_ALGORITHM_INCONSISTENCY 78 +# define X509_V_ERR_INVALID_CA 79 +# define X509_V_ERR_PATHLEN_INVALID_FOR_NON_CA 80 +# define X509_V_ERR_PATHLEN_WITHOUT_KU_KEY_CERT_SIGN 81 +# define X509_V_ERR_KU_KEY_CERT_SIGN_INVALID_FOR_NON_CA 82 +# define X509_V_ERR_ISSUER_NAME_EMPTY 83 +# define X509_V_ERR_SUBJECT_NAME_EMPTY 84 +# define X509_V_ERR_MISSING_AUTHORITY_KEY_IDENTIFIER 85 +# define X509_V_ERR_MISSING_SUBJECT_KEY_IDENTIFIER 86 +# define X509_V_ERR_EMPTY_SUBJECT_ALT_NAME 87 +# define X509_V_ERR_EMPTY_SUBJECT_SAN_NOT_CRITICAL 88 +# define X509_V_ERR_CA_BCONS_NOT_CRITICAL 89 +# define X509_V_ERR_AUTHORITY_KEY_IDENTIFIER_CRITICAL 90 +# define X509_V_ERR_SUBJECT_KEY_IDENTIFIER_CRITICAL 91 +# define X509_V_ERR_CA_CERT_MISSING_KEY_USAGE 92 +# define X509_V_ERR_EXTENSIONS_REQUIRE_VERSION_3 93 +# define X509_V_ERR_EC_KEY_EXPLICIT_PARAMS 94 +# define X509_V_ERR_RPK_UNTRUSTED 95 + +/* Certificate verify flags */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_V_FLAG_CB_ISSUER_CHECK 0x0 /* Deprecated */ +# endif +/* Use check time instead of current time */ +# define X509_V_FLAG_USE_CHECK_TIME 0x2 +/* Lookup CRLs */ +# define X509_V_FLAG_CRL_CHECK 0x4 +/* Lookup CRLs for whole chain */ +# define X509_V_FLAG_CRL_CHECK_ALL 0x8 +/* Ignore unhandled critical extensions */ +# define X509_V_FLAG_IGNORE_CRITICAL 0x10 +/* Disable workarounds for broken certificates */ +# define X509_V_FLAG_X509_STRICT 0x20 +/* Enable proxy certificate validation */ +# define X509_V_FLAG_ALLOW_PROXY_CERTS 0x40 +/* Enable policy checking */ +# define X509_V_FLAG_POLICY_CHECK 0x80 +/* Policy variable require-explicit-policy */ +# define X509_V_FLAG_EXPLICIT_POLICY 0x100 +/* Policy variable inhibit-any-policy */ +# define X509_V_FLAG_INHIBIT_ANY 0x200 +/* Policy variable inhibit-policy-mapping */ +# define X509_V_FLAG_INHIBIT_MAP 0x400 +/* Notify callback that policy is OK */ +# define X509_V_FLAG_NOTIFY_POLICY 0x800 +/* Extended CRL features such as indirect CRLs, alternate CRL signing keys */ +# define X509_V_FLAG_EXTENDED_CRL_SUPPORT 0x1000 +/* Delta CRL support */ +# define X509_V_FLAG_USE_DELTAS 0x2000 +/* Check self-signed CA signature */ +# define X509_V_FLAG_CHECK_SS_SIGNATURE 0x4000 +/* Use trusted store first */ +# define X509_V_FLAG_TRUSTED_FIRST 0x8000 +/* Suite B 128 bit only mode: not normally used */ +# define X509_V_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define X509_V_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define X509_V_FLAG_SUITEB_128_LOS 0x30000 +/* Allow partial chains if at least one certificate is in trusted store */ +# define X509_V_FLAG_PARTIAL_CHAIN 0x80000 +/* + * If the initial chain is not trusted, do not attempt to build an alternative + * chain. Alternate chain checking was introduced in 1.1.0. Setting this flag + * will force the behaviour to match that of previous versions. + */ +# define X509_V_FLAG_NO_ALT_CHAINS 0x100000 +/* Do not check certificate/CRL validity against current time */ +# define X509_V_FLAG_NO_CHECK_TIME 0x200000 + +# define X509_VP_FLAG_DEFAULT 0x1 +# define X509_VP_FLAG_OVERWRITE 0x2 +# define X509_VP_FLAG_RESET_FLAGS 0x4 +# define X509_VP_FLAG_LOCKED 0x8 +# define X509_VP_FLAG_ONCE 0x10 + +/* Internal use: mask of policy related options */ +# define X509_V_FLAG_POLICY_MASK (X509_V_FLAG_POLICY_CHECK \ + | X509_V_FLAG_EXPLICIT_POLICY \ + | X509_V_FLAG_INHIBIT_ANY \ + | X509_V_FLAG_INHIBIT_MAP) + +int X509_OBJECT_idx_by_subject(STACK_OF(X509_OBJECT) *h, X509_LOOKUP_TYPE type, + const X509_NAME *name); +X509_OBJECT *X509_OBJECT_retrieve_by_subject(STACK_OF(X509_OBJECT) *h, + X509_LOOKUP_TYPE type, + const X509_NAME *name); +X509_OBJECT *X509_OBJECT_retrieve_match(STACK_OF(X509_OBJECT) *h, + X509_OBJECT *x); +int X509_OBJECT_up_ref_count(X509_OBJECT *a); +X509_OBJECT *X509_OBJECT_new(void); +void X509_OBJECT_free(X509_OBJECT *a); +X509_LOOKUP_TYPE X509_OBJECT_get_type(const X509_OBJECT *a); +X509 *X509_OBJECT_get0_X509(const X509_OBJECT *a); +int X509_OBJECT_set1_X509(X509_OBJECT *a, X509 *obj); +X509_CRL *X509_OBJECT_get0_X509_CRL(const X509_OBJECT *a); +int X509_OBJECT_set1_X509_CRL(X509_OBJECT *a, X509_CRL *obj); +X509_STORE *X509_STORE_new(void); +void X509_STORE_free(X509_STORE *xs); +int X509_STORE_lock(X509_STORE *xs); +int X509_STORE_unlock(X509_STORE *xs); +int X509_STORE_up_ref(X509_STORE *xs); +STACK_OF(X509_OBJECT) *X509_STORE_get0_objects(const X509_STORE *xs); +STACK_OF(X509) *X509_STORE_get1_all_certs(X509_STORE *xs); +STACK_OF(X509) *X509_STORE_CTX_get1_certs(X509_STORE_CTX *xs, + const X509_NAME *nm); +STACK_OF(X509_CRL) *X509_STORE_CTX_get1_crls(const X509_STORE_CTX *st, + const X509_NAME *nm); +int X509_STORE_set_flags(X509_STORE *xs, unsigned long flags); +int X509_STORE_set_purpose(X509_STORE *xs, int purpose); +int X509_STORE_set_trust(X509_STORE *xs, int trust); +int X509_STORE_set1_param(X509_STORE *xs, const X509_VERIFY_PARAM *pm); +X509_VERIFY_PARAM *X509_STORE_get0_param(const X509_STORE *xs); + +void X509_STORE_set_verify(X509_STORE *xs, X509_STORE_CTX_verify_fn verify); +#define X509_STORE_set_verify_func(ctx, func) \ + X509_STORE_set_verify((ctx),(func)) +void X509_STORE_CTX_set_verify(X509_STORE_CTX *ctx, + X509_STORE_CTX_verify_fn verify); +X509_STORE_CTX_verify_fn X509_STORE_get_verify(const X509_STORE *xs); +void X509_STORE_set_verify_cb(X509_STORE *xs, + X509_STORE_CTX_verify_cb verify_cb); +# define X509_STORE_set_verify_cb_func(ctx,func) \ + X509_STORE_set_verify_cb((ctx),(func)) +X509_STORE_CTX_verify_cb X509_STORE_get_verify_cb(const X509_STORE *xs); +void X509_STORE_set_get_issuer(X509_STORE *xs, + X509_STORE_CTX_get_issuer_fn get_issuer); +X509_STORE_CTX_get_issuer_fn X509_STORE_get_get_issuer(const X509_STORE *xs); +void X509_STORE_set_check_issued(X509_STORE *xs, + X509_STORE_CTX_check_issued_fn check_issued); +X509_STORE_CTX_check_issued_fn X509_STORE_get_check_issued(const X509_STORE *s); +void X509_STORE_set_check_revocation(X509_STORE *xs, + X509_STORE_CTX_check_revocation_fn check_revocation); +X509_STORE_CTX_check_revocation_fn + X509_STORE_get_check_revocation(const X509_STORE *xs); +void X509_STORE_set_get_crl(X509_STORE *xs, + X509_STORE_CTX_get_crl_fn get_crl); +X509_STORE_CTX_get_crl_fn X509_STORE_get_get_crl(const X509_STORE *xs); +void X509_STORE_set_check_crl(X509_STORE *xs, + X509_STORE_CTX_check_crl_fn check_crl); +X509_STORE_CTX_check_crl_fn X509_STORE_get_check_crl(const X509_STORE *xs); +void X509_STORE_set_cert_crl(X509_STORE *xs, + X509_STORE_CTX_cert_crl_fn cert_crl); +X509_STORE_CTX_cert_crl_fn X509_STORE_get_cert_crl(const X509_STORE *xs); +void X509_STORE_set_check_policy(X509_STORE *xs, + X509_STORE_CTX_check_policy_fn check_policy); +X509_STORE_CTX_check_policy_fn X509_STORE_get_check_policy(const X509_STORE *s); +void X509_STORE_set_lookup_certs(X509_STORE *xs, + X509_STORE_CTX_lookup_certs_fn lookup_certs); +X509_STORE_CTX_lookup_certs_fn X509_STORE_get_lookup_certs(const X509_STORE *s); +void X509_STORE_set_lookup_crls(X509_STORE *xs, + X509_STORE_CTX_lookup_crls_fn lookup_crls); +#define X509_STORE_set_lookup_crls_cb(ctx, func) \ + X509_STORE_set_lookup_crls((ctx), (func)) +X509_STORE_CTX_lookup_crls_fn X509_STORE_get_lookup_crls(const X509_STORE *xs); +void X509_STORE_set_cleanup(X509_STORE *xs, + X509_STORE_CTX_cleanup_fn cleanup); +X509_STORE_CTX_cleanup_fn X509_STORE_get_cleanup(const X509_STORE *xs); + +#define X509_STORE_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509_STORE, l, p, newf, dupf, freef) +int X509_STORE_set_ex_data(X509_STORE *xs, int idx, void *data); +void *X509_STORE_get_ex_data(const X509_STORE *xs, int idx); + +X509_STORE_CTX *X509_STORE_CTX_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +X509_STORE_CTX *X509_STORE_CTX_new(void); + +int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x); + +void X509_STORE_CTX_free(X509_STORE_CTX *ctx); +int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *trust_store, + X509 *target, STACK_OF(X509) *untrusted); +int X509_STORE_CTX_init_rpk(X509_STORE_CTX *ctx, X509_STORE *trust_store, + EVP_PKEY* rpk); +void X509_STORE_CTX_set0_trusted_stack(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); +void X509_STORE_CTX_cleanup(X509_STORE_CTX *ctx); + +X509_STORE *X509_STORE_CTX_get0_store(const X509_STORE_CTX *ctx); +X509 *X509_STORE_CTX_get0_cert(const X509_STORE_CTX *ctx); +EVP_PKEY *X509_STORE_CTX_get0_rpk(const X509_STORE_CTX *ctx); +STACK_OF(X509)* X509_STORE_CTX_get0_untrusted(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set0_untrusted(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); +void X509_STORE_CTX_set_verify_cb(X509_STORE_CTX *ctx, + X509_STORE_CTX_verify_cb verify); +X509_STORE_CTX_verify_cb X509_STORE_CTX_get_verify_cb(const X509_STORE_CTX *ctx); +X509_STORE_CTX_verify_fn X509_STORE_CTX_get_verify(const X509_STORE_CTX *ctx); +X509_STORE_CTX_get_issuer_fn X509_STORE_CTX_get_get_issuer(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_issued_fn X509_STORE_CTX_get_check_issued(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_revocation_fn X509_STORE_CTX_get_check_revocation(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_get_crl(X509_STORE_CTX *ctx, + X509_STORE_CTX_get_crl_fn get_crl); +X509_STORE_CTX_get_crl_fn X509_STORE_CTX_get_get_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_crl_fn X509_STORE_CTX_get_check_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_cert_crl_fn X509_STORE_CTX_get_cert_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_policy_fn X509_STORE_CTX_get_check_policy(const X509_STORE_CTX *ctx); +X509_STORE_CTX_lookup_certs_fn X509_STORE_CTX_get_lookup_certs(const X509_STORE_CTX *ctx); +X509_STORE_CTX_lookup_crls_fn X509_STORE_CTX_get_lookup_crls(const X509_STORE_CTX *ctx); +X509_STORE_CTX_cleanup_fn X509_STORE_CTX_get_cleanup(const X509_STORE_CTX *ctx); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_STORE_CTX_get_chain X509_STORE_CTX_get0_chain +# define X509_STORE_CTX_set_chain X509_STORE_CTX_set0_untrusted +# define X509_STORE_CTX_trusted_stack X509_STORE_CTX_set0_trusted_stack +# define X509_STORE_get_by_subject X509_STORE_CTX_get_by_subject +# define X509_STORE_get1_certs X509_STORE_CTX_get1_certs +# define X509_STORE_get1_crls X509_STORE_CTX_get1_crls +/* the following macro is misspelled; use X509_STORE_get1_certs instead */ +# define X509_STORE_get1_cert X509_STORE_CTX_get1_certs +/* the following macro is misspelled; use X509_STORE_get1_crls instead */ +# define X509_STORE_get1_crl X509_STORE_CTX_get1_crls +#endif + +X509_LOOKUP *X509_STORE_add_lookup(X509_STORE *xs, X509_LOOKUP_METHOD *m); +X509_LOOKUP_METHOD *X509_LOOKUP_hash_dir(void); +X509_LOOKUP_METHOD *X509_LOOKUP_file(void); +X509_LOOKUP_METHOD *X509_LOOKUP_store(void); + +typedef int (*X509_LOOKUP_ctrl_fn)(X509_LOOKUP *ctx, int cmd, const char *argc, + long argl, char **ret); +typedef int (*X509_LOOKUP_ctrl_ex_fn)( + X509_LOOKUP *ctx, int cmd, const char *argc, long argl, char **ret, + OSSL_LIB_CTX *libctx, const char *propq); + +typedef int (*X509_LOOKUP_get_by_subject_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_subject_ex_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + X509_OBJECT *ret, + OSSL_LIB_CTX *libctx, + const char *propq); +typedef int (*X509_LOOKUP_get_by_issuer_serial_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + const ASN1_INTEGER *serial, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_fingerprint_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const unsigned char* bytes, + int len, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_alias_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const char *str, + int len, + X509_OBJECT *ret); + +X509_LOOKUP_METHOD *X509_LOOKUP_meth_new(const char *name); +void X509_LOOKUP_meth_free(X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_new_item(X509_LOOKUP_METHOD *method, + int (*new_item) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_new_item(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_free(X509_LOOKUP_METHOD *method, + void (*free_fn) (X509_LOOKUP *ctx)); +void (*X509_LOOKUP_meth_get_free(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_init(X509_LOOKUP_METHOD *method, + int (*init) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_init(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_shutdown(X509_LOOKUP_METHOD *method, + int (*shutdown) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_shutdown(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_ctrl(X509_LOOKUP_METHOD *method, + X509_LOOKUP_ctrl_fn ctrl_fn); +X509_LOOKUP_ctrl_fn X509_LOOKUP_meth_get_ctrl(const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_subject(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_subject_fn fn); +X509_LOOKUP_get_by_subject_fn X509_LOOKUP_meth_get_get_by_subject( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_issuer_serial(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_issuer_serial_fn fn); +X509_LOOKUP_get_by_issuer_serial_fn X509_LOOKUP_meth_get_get_by_issuer_serial( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_fingerprint(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_fingerprint_fn fn); +X509_LOOKUP_get_by_fingerprint_fn X509_LOOKUP_meth_get_get_by_fingerprint( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_alias(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_alias_fn fn); +X509_LOOKUP_get_by_alias_fn X509_LOOKUP_meth_get_get_by_alias( + const X509_LOOKUP_METHOD *method); + + +int X509_STORE_add_cert(X509_STORE *xs, X509 *x); +int X509_STORE_add_crl(X509_STORE *xs, X509_CRL *x); + +int X509_STORE_CTX_get_by_subject(const X509_STORE_CTX *vs, + X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret); +X509_OBJECT *X509_STORE_CTX_get_obj_by_subject(X509_STORE_CTX *vs, + X509_LOOKUP_TYPE type, + const X509_NAME *name); + +int X509_LOOKUP_ctrl(X509_LOOKUP *ctx, int cmd, const char *argc, + long argl, char **ret); +int X509_LOOKUP_ctrl_ex(X509_LOOKUP *ctx, int cmd, const char *argc, long argl, + char **ret, OSSL_LIB_CTX *libctx, const char *propq); + +int X509_load_cert_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_file_ex(X509_LOOKUP *ctx, const char *file, int type, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_load_crl_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_crl_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_crl_file_ex(X509_LOOKUP *ctx, const char *file, int type, + OSSL_LIB_CTX *libctx, const char *propq); + +X509_LOOKUP *X509_LOOKUP_new(X509_LOOKUP_METHOD *method); +void X509_LOOKUP_free(X509_LOOKUP *ctx); +int X509_LOOKUP_init(X509_LOOKUP *ctx); +int X509_LOOKUP_by_subject(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret); +int X509_LOOKUP_by_subject_ex(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_LOOKUP_by_issuer_serial(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, + const ASN1_INTEGER *serial, + X509_OBJECT *ret); +int X509_LOOKUP_by_fingerprint(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const unsigned char *bytes, int len, + X509_OBJECT *ret); +int X509_LOOKUP_by_alias(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const char *str, int len, X509_OBJECT *ret); +int X509_LOOKUP_set_method_data(X509_LOOKUP *ctx, void *data); +void *X509_LOOKUP_get_method_data(const X509_LOOKUP *ctx); +X509_STORE *X509_LOOKUP_get_store(const X509_LOOKUP *ctx); +int X509_LOOKUP_shutdown(X509_LOOKUP *ctx); + +int X509_STORE_load_file(X509_STORE *xs, const char *file); +int X509_STORE_load_path(X509_STORE *xs, const char *path); +int X509_STORE_load_store(X509_STORE *xs, const char *store); +int X509_STORE_load_locations(X509_STORE *s, const char *file, const char *dir); +int X509_STORE_set_default_paths(X509_STORE *xs); + +int X509_STORE_load_file_ex(X509_STORE *xs, const char *file, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_load_store_ex(X509_STORE *xs, const char *store, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_load_locations_ex(X509_STORE *xs, + const char *file, const char *dir, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_set_default_paths_ex(X509_STORE *xs, + OSSL_LIB_CTX *libctx, const char *propq); + +#define X509_STORE_CTX_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509_STORE_CTX, l, p, newf, dupf, freef) +int X509_STORE_CTX_set_ex_data(X509_STORE_CTX *ctx, int idx, void *data); +void *X509_STORE_CTX_get_ex_data(const X509_STORE_CTX *ctx, int idx); +int X509_STORE_CTX_get_error(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_error(X509_STORE_CTX *ctx, int s); +int X509_STORE_CTX_get_error_depth(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_error_depth(X509_STORE_CTX *ctx, int depth); +X509 *X509_STORE_CTX_get_current_cert(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_current_cert(X509_STORE_CTX *ctx, X509 *x); +X509 *X509_STORE_CTX_get0_current_issuer(const X509_STORE_CTX *ctx); +X509_CRL *X509_STORE_CTX_get0_current_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX *X509_STORE_CTX_get0_parent_ctx(const X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_STORE_CTX_get0_chain(const X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_STORE_CTX_get1_chain(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_cert(X509_STORE_CTX *ctx, X509 *target); +void X509_STORE_CTX_set0_rpk(X509_STORE_CTX *ctx, EVP_PKEY *target); +void X509_STORE_CTX_set0_verified_chain(X509_STORE_CTX *c, STACK_OF(X509) *sk); +void X509_STORE_CTX_set0_crls(X509_STORE_CTX *ctx, STACK_OF(X509_CRL) *sk); +int X509_STORE_CTX_set_purpose(X509_STORE_CTX *ctx, int purpose); +int X509_STORE_CTX_set_trust(X509_STORE_CTX *ctx, int trust); +int X509_STORE_CTX_purpose_inherit(X509_STORE_CTX *ctx, int def_purpose, + int purpose, int trust); +void X509_STORE_CTX_set_flags(X509_STORE_CTX *ctx, unsigned long flags); +void X509_STORE_CTX_set_time(X509_STORE_CTX *ctx, unsigned long flags, + time_t t); +void X509_STORE_CTX_set_current_reasons(X509_STORE_CTX *ctx, + unsigned int current_reasons); + +X509_POLICY_TREE *X509_STORE_CTX_get0_policy_tree(const X509_STORE_CTX *ctx); +int X509_STORE_CTX_get_explicit_policy(const X509_STORE_CTX *ctx); +int X509_STORE_CTX_get_num_untrusted(const X509_STORE_CTX *ctx); + +X509_VERIFY_PARAM *X509_STORE_CTX_get0_param(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set0_param(X509_STORE_CTX *ctx, X509_VERIFY_PARAM *param); +int X509_STORE_CTX_set_default(X509_STORE_CTX *ctx, const char *name); + +/* + * Bridge opacity barrier between libcrypt and libssl, also needed to support + * offline testing in test/danetest.c + */ +void X509_STORE_CTX_set0_dane(X509_STORE_CTX *ctx, SSL_DANE *dane); +#define DANE_FLAG_NO_DANE_EE_NAMECHECKS (1L << 0) + +/* X509_VERIFY_PARAM functions */ + +X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void); +void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *to, + const X509_VERIFY_PARAM *from); +int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to, + const X509_VERIFY_PARAM *from); +int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name); +int X509_VERIFY_PARAM_set_flags(X509_VERIFY_PARAM *param, + unsigned long flags); +int X509_VERIFY_PARAM_clear_flags(X509_VERIFY_PARAM *param, + unsigned long flags); +unsigned long X509_VERIFY_PARAM_get_flags(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set_purpose(X509_VERIFY_PARAM *param, int purpose); +int X509_VERIFY_PARAM_set_trust(X509_VERIFY_PARAM *param, int trust); +void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth); +void X509_VERIFY_PARAM_set_auth_level(X509_VERIFY_PARAM *param, int auth_level); +time_t X509_VERIFY_PARAM_get_time(const X509_VERIFY_PARAM *param); +void X509_VERIFY_PARAM_set_time(X509_VERIFY_PARAM *param, time_t t); +int X509_VERIFY_PARAM_add0_policy(X509_VERIFY_PARAM *param, + ASN1_OBJECT *policy); +int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param, + STACK_OF(ASN1_OBJECT) *policies); + +int X509_VERIFY_PARAM_set_inh_flags(X509_VERIFY_PARAM *param, + uint32_t flags); +uint32_t X509_VERIFY_PARAM_get_inh_flags(const X509_VERIFY_PARAM *param); + +char *X509_VERIFY_PARAM_get0_host(X509_VERIFY_PARAM *param, int idx); +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags); +unsigned int X509_VERIFY_PARAM_get_hostflags(const X509_VERIFY_PARAM *param); +char *X509_VERIFY_PARAM_get0_peername(const X509_VERIFY_PARAM *param); +void X509_VERIFY_PARAM_move_peername(X509_VERIFY_PARAM *, X509_VERIFY_PARAM *); +char *X509_VERIFY_PARAM_get0_email(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen); +char *X509_VERIFY_PARAM_get1_ip_asc(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen); +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, + const char *ipasc); + +int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_auth_level(const X509_VERIFY_PARAM *param); +const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param); + +int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_count(void); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name); +void X509_VERIFY_PARAM_table_cleanup(void); + +/* Non positive return values are errors */ +#define X509_PCY_TREE_FAILURE -2 /* Failure to satisfy explicit policy */ +#define X509_PCY_TREE_INVALID -1 /* Inconsistent or invalid extensions */ +#define X509_PCY_TREE_INTERNAL 0 /* Internal error, most likely malloc */ + +/* + * Positive return values form a bit mask, all but the first are internal to + * the library and don't appear in results from X509_policy_check(). + */ +#define X509_PCY_TREE_VALID 1 /* The policy tree is valid */ +#define X509_PCY_TREE_EMPTY 2 /* The policy tree is empty */ +#define X509_PCY_TREE_EXPLICIT 4 /* Explicit policy required */ + +int X509_policy_check(X509_POLICY_TREE **ptree, int *pexplicit_policy, + STACK_OF(X509) *certs, + STACK_OF(ASN1_OBJECT) *policy_oids, unsigned int flags); + +void X509_policy_tree_free(X509_POLICY_TREE *tree); + +int X509_policy_tree_level_count(const X509_POLICY_TREE *tree); +X509_POLICY_LEVEL *X509_policy_tree_get0_level(const X509_POLICY_TREE *tree, + int i); + +STACK_OF(X509_POLICY_NODE) + *X509_policy_tree_get0_policies(const X509_POLICY_TREE *tree); + +STACK_OF(X509_POLICY_NODE) + *X509_policy_tree_get0_user_policies(const X509_POLICY_TREE *tree); + +int X509_policy_level_node_count(X509_POLICY_LEVEL *level); + +X509_POLICY_NODE *X509_policy_level_get0_node(const X509_POLICY_LEVEL *level, + int i); + +const ASN1_OBJECT *X509_policy_node_get0_policy(const X509_POLICY_NODE *node); + +STACK_OF(POLICYQUALINFO) + *X509_policy_node_get0_qualifiers(const X509_POLICY_NODE *node); +const X509_POLICY_NODE + *X509_policy_node_get0_parent(const X509_POLICY_NODE *node); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h new file mode 100644 index 00000000000..e64da7e0c5a --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/x509v3.h @@ -0,0 +1,1454 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509v3.h.in + * + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509V3_H +# define OPENSSL_X509V3_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509V3_H +# endif + +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward reference */ +struct v3_ext_method; +struct v3_ext_ctx; + +/* Useful typedefs */ + +typedef void *(*X509V3_EXT_NEW)(void); +typedef void (*X509V3_EXT_FREE) (void *); +typedef void *(*X509V3_EXT_D2I)(void *, const unsigned char **, long); +typedef int (*X509V3_EXT_I2D) (const void *, unsigned char **); +typedef STACK_OF(CONF_VALUE) * + (*X509V3_EXT_I2V) (const struct v3_ext_method *method, void *ext, + STACK_OF(CONF_VALUE) *extlist); +typedef void *(*X509V3_EXT_V2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, + STACK_OF(CONF_VALUE) *values); +typedef char *(*X509V3_EXT_I2S)(const struct v3_ext_method *method, + void *ext); +typedef void *(*X509V3_EXT_S2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, const char *str); +typedef int (*X509V3_EXT_I2R) (const struct v3_ext_method *method, void *ext, + BIO *out, int indent); +typedef void *(*X509V3_EXT_R2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, const char *str); + +/* V3 extension structure */ + +struct v3_ext_method { + int ext_nid; + int ext_flags; +/* If this is set the following four fields are ignored */ + ASN1_ITEM_EXP *it; +/* Old style ASN1 calls */ + X509V3_EXT_NEW ext_new; + X509V3_EXT_FREE ext_free; + X509V3_EXT_D2I d2i; + X509V3_EXT_I2D i2d; +/* The following pair is used for string extensions */ + X509V3_EXT_I2S i2s; + X509V3_EXT_S2I s2i; +/* The following pair is used for multi-valued extensions */ + X509V3_EXT_I2V i2v; + X509V3_EXT_V2I v2i; +/* The following are used for raw extensions */ + X509V3_EXT_I2R i2r; + X509V3_EXT_R2I r2i; + void *usr_data; /* Any extension specific data */ +}; + +typedef struct X509V3_CONF_METHOD_st { + char *(*get_string) (void *db, const char *section, const char *value); + STACK_OF(CONF_VALUE) *(*get_section) (void *db, const char *section); + void (*free_string) (void *db, char *string); + void (*free_section) (void *db, STACK_OF(CONF_VALUE) *section); +} X509V3_CONF_METHOD; + +/* Context specific info for producing X509 v3 extensions*/ +struct v3_ext_ctx { +# define X509V3_CTX_TEST 0x1 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define CTX_TEST X509V3_CTX_TEST +# endif +# define X509V3_CTX_REPLACE 0x2 + int flags; + X509 *issuer_cert; + X509 *subject_cert; + X509_REQ *subject_req; + X509_CRL *crl; + X509V3_CONF_METHOD *db_meth; + void *db; + EVP_PKEY *issuer_pkey; +/* Maybe more here */ +}; + +typedef struct v3_ext_method X509V3_EXT_METHOD; + +SKM_DEFINE_STACK_OF_INTERNAL(X509V3_EXT_METHOD, X509V3_EXT_METHOD, X509V3_EXT_METHOD) +#define sk_X509V3_EXT_METHOD_num(sk) OPENSSL_sk_num(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_value(sk, idx) ((X509V3_EXT_METHOD *)OPENSSL_sk_value(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk), (idx))) +#define sk_X509V3_EXT_METHOD_new(cmp) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new(ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp))) +#define sk_X509V3_EXT_METHOD_new_null() ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new_null()) +#define sk_X509V3_EXT_METHOD_new_reserve(cmp, n) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new_reserve(ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp), (n))) +#define sk_X509V3_EXT_METHOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (n)) +#define sk_X509V3_EXT_METHOD_free(sk) OPENSSL_sk_free(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_zero(sk) OPENSSL_sk_zero(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_delete(sk, i) ((X509V3_EXT_METHOD *)OPENSSL_sk_delete(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (i))) +#define sk_X509V3_EXT_METHOD_delete_ptr(sk, ptr) ((X509V3_EXT_METHOD *)OPENSSL_sk_delete_ptr(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr))) +#define sk_X509V3_EXT_METHOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_pop(sk) ((X509V3_EXT_METHOD *)OPENSSL_sk_pop(ossl_check_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_shift(sk) ((X509V3_EXT_METHOD *)OPENSSL_sk_shift(ossl_check_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509V3_EXT_METHOD_sk_type(sk),ossl_check_X509V3_EXT_METHOD_freefunc_type(freefunc)) +#define sk_X509V3_EXT_METHOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr), (idx)) +#define sk_X509V3_EXT_METHOD_set(sk, idx, ptr) ((X509V3_EXT_METHOD *)OPENSSL_sk_set(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (idx), ossl_check_X509V3_EXT_METHOD_type(ptr))) +#define sk_X509V3_EXT_METHOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr), pnum) +#define sk_X509V3_EXT_METHOD_sort(sk) OPENSSL_sk_sort(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_dup(sk) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_dup(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_deep_copy(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_copyfunc_type(copyfunc), ossl_check_X509V3_EXT_METHOD_freefunc_type(freefunc))) +#define sk_X509V3_EXT_METHOD_set_cmp_func(sk, cmp) ((sk_X509V3_EXT_METHOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp))) + + +/* ext_flags values */ +# define X509V3_EXT_DYNAMIC 0x1 +# define X509V3_EXT_CTX_DEP 0x2 +# define X509V3_EXT_MULTILINE 0x4 + +typedef BIT_STRING_BITNAME ENUMERATED_NAMES; + +typedef struct BASIC_CONSTRAINTS_st { + int ca; + ASN1_INTEGER *pathlen; +} BASIC_CONSTRAINTS; + +typedef struct PKEY_USAGE_PERIOD_st { + ASN1_GENERALIZEDTIME *notBefore; + ASN1_GENERALIZEDTIME *notAfter; +} PKEY_USAGE_PERIOD; + +typedef struct otherName_st { + ASN1_OBJECT *type_id; + ASN1_TYPE *value; +} OTHERNAME; + +typedef struct EDIPartyName_st { + ASN1_STRING *nameAssigner; + ASN1_STRING *partyName; +} EDIPARTYNAME; + +typedef struct GENERAL_NAME_st { +# define GEN_OTHERNAME 0 +# define GEN_EMAIL 1 +# define GEN_DNS 2 +# define GEN_X400 3 +# define GEN_DIRNAME 4 +# define GEN_EDIPARTY 5 +# define GEN_URI 6 +# define GEN_IPADD 7 +# define GEN_RID 8 + int type; + union { + char *ptr; + OTHERNAME *otherName; /* otherName */ + ASN1_IA5STRING *rfc822Name; + ASN1_IA5STRING *dNSName; + ASN1_STRING *x400Address; + X509_NAME *directoryName; + EDIPARTYNAME *ediPartyName; + ASN1_IA5STRING *uniformResourceIdentifier; + ASN1_OCTET_STRING *iPAddress; + ASN1_OBJECT *registeredID; + /* Old names */ + ASN1_OCTET_STRING *ip; /* iPAddress */ + X509_NAME *dirn; /* dirn */ + ASN1_IA5STRING *ia5; /* rfc822Name, dNSName, + * uniformResourceIdentifier */ + ASN1_OBJECT *rid; /* registeredID */ + ASN1_TYPE *other; /* x400Address */ + } d; +} GENERAL_NAME; + +typedef struct ACCESS_DESCRIPTION_st { + ASN1_OBJECT *method; + GENERAL_NAME *location; +} ACCESS_DESCRIPTION; + +SKM_DEFINE_STACK_OF_INTERNAL(ACCESS_DESCRIPTION, ACCESS_DESCRIPTION, ACCESS_DESCRIPTION) +#define sk_ACCESS_DESCRIPTION_num(sk) OPENSSL_sk_num(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_value(sk, idx) ((ACCESS_DESCRIPTION *)OPENSSL_sk_value(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk), (idx))) +#define sk_ACCESS_DESCRIPTION_new(cmp) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new(ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp))) +#define sk_ACCESS_DESCRIPTION_new_null() ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new_null()) +#define sk_ACCESS_DESCRIPTION_new_reserve(cmp, n) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new_reserve(ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp), (n))) +#define sk_ACCESS_DESCRIPTION_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (n)) +#define sk_ACCESS_DESCRIPTION_free(sk) OPENSSL_sk_free(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_zero(sk) OPENSSL_sk_zero(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_delete(sk, i) ((ACCESS_DESCRIPTION *)OPENSSL_sk_delete(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (i))) +#define sk_ACCESS_DESCRIPTION_delete_ptr(sk, ptr) ((ACCESS_DESCRIPTION *)OPENSSL_sk_delete_ptr(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr))) +#define sk_ACCESS_DESCRIPTION_push(sk, ptr) OPENSSL_sk_push(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_pop(sk) ((ACCESS_DESCRIPTION *)OPENSSL_sk_pop(ossl_check_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_shift(sk) ((ACCESS_DESCRIPTION *)OPENSSL_sk_shift(ossl_check_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ACCESS_DESCRIPTION_sk_type(sk),ossl_check_ACCESS_DESCRIPTION_freefunc_type(freefunc)) +#define sk_ACCESS_DESCRIPTION_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr), (idx)) +#define sk_ACCESS_DESCRIPTION_set(sk, idx, ptr) ((ACCESS_DESCRIPTION *)OPENSSL_sk_set(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (idx), ossl_check_ACCESS_DESCRIPTION_type(ptr))) +#define sk_ACCESS_DESCRIPTION_find(sk, ptr) OPENSSL_sk_find(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr), pnum) +#define sk_ACCESS_DESCRIPTION_sort(sk) OPENSSL_sk_sort(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_dup(sk) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_dup(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_deep_copy(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_copyfunc_type(copyfunc), ossl_check_ACCESS_DESCRIPTION_freefunc_type(freefunc))) +#define sk_ACCESS_DESCRIPTION_set_cmp_func(sk, cmp) ((sk_ACCESS_DESCRIPTION_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_NAME, GENERAL_NAME, GENERAL_NAME) +#define sk_GENERAL_NAME_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_value(sk, idx) ((GENERAL_NAME *)OPENSSL_sk_value(ossl_check_const_GENERAL_NAME_sk_type(sk), (idx))) +#define sk_GENERAL_NAME_new(cmp) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new(ossl_check_GENERAL_NAME_compfunc_type(cmp))) +#define sk_GENERAL_NAME_new_null() ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_NAME_new_reserve(cmp, n) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_NAME_compfunc_type(cmp), (n))) +#define sk_GENERAL_NAME_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_NAME_sk_type(sk), (n)) +#define sk_GENERAL_NAME_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_delete(sk, i) ((GENERAL_NAME *)OPENSSL_sk_delete(ossl_check_GENERAL_NAME_sk_type(sk), (i))) +#define sk_GENERAL_NAME_delete_ptr(sk, ptr) ((GENERAL_NAME *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr))) +#define sk_GENERAL_NAME_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_pop(sk) ((GENERAL_NAME *)OPENSSL_sk_pop(ossl_check_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_shift(sk) ((GENERAL_NAME *)OPENSSL_sk_shift(ossl_check_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_NAME_sk_type(sk),ossl_check_GENERAL_NAME_freefunc_type(freefunc)) +#define sk_GENERAL_NAME_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr), (idx)) +#define sk_GENERAL_NAME_set(sk, idx, ptr) ((GENERAL_NAME *)OPENSSL_sk_set(ossl_check_GENERAL_NAME_sk_type(sk), (idx), ossl_check_GENERAL_NAME_type(ptr))) +#define sk_GENERAL_NAME_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr), pnum) +#define sk_GENERAL_NAME_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_dup(sk) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_copyfunc_type(copyfunc), ossl_check_GENERAL_NAME_freefunc_type(freefunc))) +#define sk_GENERAL_NAME_set_cmp_func(sk, cmp) ((sk_GENERAL_NAME_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_compfunc_type(cmp))) + + +typedef STACK_OF(ACCESS_DESCRIPTION) AUTHORITY_INFO_ACCESS; +typedef STACK_OF(ASN1_OBJECT) EXTENDED_KEY_USAGE; +typedef STACK_OF(ASN1_INTEGER) TLS_FEATURE; +typedef STACK_OF(GENERAL_NAME) GENERAL_NAMES; + +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_NAMES, GENERAL_NAMES, GENERAL_NAMES) +#define sk_GENERAL_NAMES_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_value(sk, idx) ((GENERAL_NAMES *)OPENSSL_sk_value(ossl_check_const_GENERAL_NAMES_sk_type(sk), (idx))) +#define sk_GENERAL_NAMES_new(cmp) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new(ossl_check_GENERAL_NAMES_compfunc_type(cmp))) +#define sk_GENERAL_NAMES_new_null() ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_NAMES_new_reserve(cmp, n) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_NAMES_compfunc_type(cmp), (n))) +#define sk_GENERAL_NAMES_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_NAMES_sk_type(sk), (n)) +#define sk_GENERAL_NAMES_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_delete(sk, i) ((GENERAL_NAMES *)OPENSSL_sk_delete(ossl_check_GENERAL_NAMES_sk_type(sk), (i))) +#define sk_GENERAL_NAMES_delete_ptr(sk, ptr) ((GENERAL_NAMES *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr))) +#define sk_GENERAL_NAMES_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_pop(sk) ((GENERAL_NAMES *)OPENSSL_sk_pop(ossl_check_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_shift(sk) ((GENERAL_NAMES *)OPENSSL_sk_shift(ossl_check_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_NAMES_sk_type(sk),ossl_check_GENERAL_NAMES_freefunc_type(freefunc)) +#define sk_GENERAL_NAMES_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr), (idx)) +#define sk_GENERAL_NAMES_set(sk, idx, ptr) ((GENERAL_NAMES *)OPENSSL_sk_set(ossl_check_GENERAL_NAMES_sk_type(sk), (idx), ossl_check_GENERAL_NAMES_type(ptr))) +#define sk_GENERAL_NAMES_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr), pnum) +#define sk_GENERAL_NAMES_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_dup(sk) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_copyfunc_type(copyfunc), ossl_check_GENERAL_NAMES_freefunc_type(freefunc))) +#define sk_GENERAL_NAMES_set_cmp_func(sk, cmp) ((sk_GENERAL_NAMES_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_compfunc_type(cmp))) + + +typedef struct DIST_POINT_NAME_st { + int type; + union { + GENERAL_NAMES *fullname; + STACK_OF(X509_NAME_ENTRY) *relativename; + } name; +/* If relativename then this contains the full distribution point name */ + X509_NAME *dpname; +} DIST_POINT_NAME; +/* All existing reasons */ +# define CRLDP_ALL_REASONS 0x807f + +# define CRL_REASON_NONE -1 +# define CRL_REASON_UNSPECIFIED 0 +# define CRL_REASON_KEY_COMPROMISE 1 +# define CRL_REASON_CA_COMPROMISE 2 +# define CRL_REASON_AFFILIATION_CHANGED 3 +# define CRL_REASON_SUPERSEDED 4 +# define CRL_REASON_CESSATION_OF_OPERATION 5 +# define CRL_REASON_CERTIFICATE_HOLD 6 +# define CRL_REASON_REMOVE_FROM_CRL 8 +# define CRL_REASON_PRIVILEGE_WITHDRAWN 9 +# define CRL_REASON_AA_COMPROMISE 10 + +struct DIST_POINT_st { + DIST_POINT_NAME *distpoint; + ASN1_BIT_STRING *reasons; + GENERAL_NAMES *CRLissuer; + int dp_reasons; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(DIST_POINT, DIST_POINT, DIST_POINT) +#define sk_DIST_POINT_num(sk) OPENSSL_sk_num(ossl_check_const_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_value(sk, idx) ((DIST_POINT *)OPENSSL_sk_value(ossl_check_const_DIST_POINT_sk_type(sk), (idx))) +#define sk_DIST_POINT_new(cmp) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new(ossl_check_DIST_POINT_compfunc_type(cmp))) +#define sk_DIST_POINT_new_null() ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new_null()) +#define sk_DIST_POINT_new_reserve(cmp, n) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new_reserve(ossl_check_DIST_POINT_compfunc_type(cmp), (n))) +#define sk_DIST_POINT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_DIST_POINT_sk_type(sk), (n)) +#define sk_DIST_POINT_free(sk) OPENSSL_sk_free(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_zero(sk) OPENSSL_sk_zero(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_delete(sk, i) ((DIST_POINT *)OPENSSL_sk_delete(ossl_check_DIST_POINT_sk_type(sk), (i))) +#define sk_DIST_POINT_delete_ptr(sk, ptr) ((DIST_POINT *)OPENSSL_sk_delete_ptr(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr))) +#define sk_DIST_POINT_push(sk, ptr) OPENSSL_sk_push(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_pop(sk) ((DIST_POINT *)OPENSSL_sk_pop(ossl_check_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_shift(sk) ((DIST_POINT *)OPENSSL_sk_shift(ossl_check_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_DIST_POINT_sk_type(sk),ossl_check_DIST_POINT_freefunc_type(freefunc)) +#define sk_DIST_POINT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr), (idx)) +#define sk_DIST_POINT_set(sk, idx, ptr) ((DIST_POINT *)OPENSSL_sk_set(ossl_check_DIST_POINT_sk_type(sk), (idx), ossl_check_DIST_POINT_type(ptr))) +#define sk_DIST_POINT_find(sk, ptr) OPENSSL_sk_find(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr), pnum) +#define sk_DIST_POINT_sort(sk) OPENSSL_sk_sort(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_dup(sk) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_dup(ossl_check_const_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_deep_copy(ossl_check_const_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_copyfunc_type(copyfunc), ossl_check_DIST_POINT_freefunc_type(freefunc))) +#define sk_DIST_POINT_set_cmp_func(sk, cmp) ((sk_DIST_POINT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_compfunc_type(cmp))) + + +typedef STACK_OF(DIST_POINT) CRL_DIST_POINTS; + +struct AUTHORITY_KEYID_st { + ASN1_OCTET_STRING *keyid; + GENERAL_NAMES *issuer; + ASN1_INTEGER *serial; +}; + +/* Strong extranet structures */ + +typedef struct SXNET_ID_st { + ASN1_INTEGER *zone; + ASN1_OCTET_STRING *user; +} SXNETID; + +SKM_DEFINE_STACK_OF_INTERNAL(SXNETID, SXNETID, SXNETID) +#define sk_SXNETID_num(sk) OPENSSL_sk_num(ossl_check_const_SXNETID_sk_type(sk)) +#define sk_SXNETID_value(sk, idx) ((SXNETID *)OPENSSL_sk_value(ossl_check_const_SXNETID_sk_type(sk), (idx))) +#define sk_SXNETID_new(cmp) ((STACK_OF(SXNETID) *)OPENSSL_sk_new(ossl_check_SXNETID_compfunc_type(cmp))) +#define sk_SXNETID_new_null() ((STACK_OF(SXNETID) *)OPENSSL_sk_new_null()) +#define sk_SXNETID_new_reserve(cmp, n) ((STACK_OF(SXNETID) *)OPENSSL_sk_new_reserve(ossl_check_SXNETID_compfunc_type(cmp), (n))) +#define sk_SXNETID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SXNETID_sk_type(sk), (n)) +#define sk_SXNETID_free(sk) OPENSSL_sk_free(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_zero(sk) OPENSSL_sk_zero(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_delete(sk, i) ((SXNETID *)OPENSSL_sk_delete(ossl_check_SXNETID_sk_type(sk), (i))) +#define sk_SXNETID_delete_ptr(sk, ptr) ((SXNETID *)OPENSSL_sk_delete_ptr(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr))) +#define sk_SXNETID_push(sk, ptr) OPENSSL_sk_push(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_pop(sk) ((SXNETID *)OPENSSL_sk_pop(ossl_check_SXNETID_sk_type(sk))) +#define sk_SXNETID_shift(sk) ((SXNETID *)OPENSSL_sk_shift(ossl_check_SXNETID_sk_type(sk))) +#define sk_SXNETID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SXNETID_sk_type(sk),ossl_check_SXNETID_freefunc_type(freefunc)) +#define sk_SXNETID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr), (idx)) +#define sk_SXNETID_set(sk, idx, ptr) ((SXNETID *)OPENSSL_sk_set(ossl_check_SXNETID_sk_type(sk), (idx), ossl_check_SXNETID_type(ptr))) +#define sk_SXNETID_find(sk, ptr) OPENSSL_sk_find(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr), pnum) +#define sk_SXNETID_sort(sk) OPENSSL_sk_sort(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SXNETID_sk_type(sk)) +#define sk_SXNETID_dup(sk) ((STACK_OF(SXNETID) *)OPENSSL_sk_dup(ossl_check_const_SXNETID_sk_type(sk))) +#define sk_SXNETID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SXNETID) *)OPENSSL_sk_deep_copy(ossl_check_const_SXNETID_sk_type(sk), ossl_check_SXNETID_copyfunc_type(copyfunc), ossl_check_SXNETID_freefunc_type(freefunc))) +#define sk_SXNETID_set_cmp_func(sk, cmp) ((sk_SXNETID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_compfunc_type(cmp))) + + + +typedef struct SXNET_st { + ASN1_INTEGER *version; + STACK_OF(SXNETID) *ids; +} SXNET; + +typedef struct ISSUER_SIGN_TOOL_st { + ASN1_UTF8STRING *signTool; + ASN1_UTF8STRING *cATool; + ASN1_UTF8STRING *signToolCert; + ASN1_UTF8STRING *cAToolCert; +} ISSUER_SIGN_TOOL; + +typedef struct NOTICEREF_st { + ASN1_STRING *organization; + STACK_OF(ASN1_INTEGER) *noticenos; +} NOTICEREF; + +typedef struct USERNOTICE_st { + NOTICEREF *noticeref; + ASN1_STRING *exptext; +} USERNOTICE; + +typedef struct POLICYQUALINFO_st { + ASN1_OBJECT *pqualid; + union { + ASN1_IA5STRING *cpsuri; + USERNOTICE *usernotice; + ASN1_TYPE *other; + } d; +} POLICYQUALINFO; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICYQUALINFO, POLICYQUALINFO, POLICYQUALINFO) +#define sk_POLICYQUALINFO_num(sk) OPENSSL_sk_num(ossl_check_const_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_value(sk, idx) ((POLICYQUALINFO *)OPENSSL_sk_value(ossl_check_const_POLICYQUALINFO_sk_type(sk), (idx))) +#define sk_POLICYQUALINFO_new(cmp) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new(ossl_check_POLICYQUALINFO_compfunc_type(cmp))) +#define sk_POLICYQUALINFO_new_null() ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new_null()) +#define sk_POLICYQUALINFO_new_reserve(cmp, n) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new_reserve(ossl_check_POLICYQUALINFO_compfunc_type(cmp), (n))) +#define sk_POLICYQUALINFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICYQUALINFO_sk_type(sk), (n)) +#define sk_POLICYQUALINFO_free(sk) OPENSSL_sk_free(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_zero(sk) OPENSSL_sk_zero(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_delete(sk, i) ((POLICYQUALINFO *)OPENSSL_sk_delete(ossl_check_POLICYQUALINFO_sk_type(sk), (i))) +#define sk_POLICYQUALINFO_delete_ptr(sk, ptr) ((POLICYQUALINFO *)OPENSSL_sk_delete_ptr(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr))) +#define sk_POLICYQUALINFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_pop(sk) ((POLICYQUALINFO *)OPENSSL_sk_pop(ossl_check_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_shift(sk) ((POLICYQUALINFO *)OPENSSL_sk_shift(ossl_check_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICYQUALINFO_sk_type(sk),ossl_check_POLICYQUALINFO_freefunc_type(freefunc)) +#define sk_POLICYQUALINFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr), (idx)) +#define sk_POLICYQUALINFO_set(sk, idx, ptr) ((POLICYQUALINFO *)OPENSSL_sk_set(ossl_check_POLICYQUALINFO_sk_type(sk), (idx), ossl_check_POLICYQUALINFO_type(ptr))) +#define sk_POLICYQUALINFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr), pnum) +#define sk_POLICYQUALINFO_sort(sk) OPENSSL_sk_sort(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_dup(sk) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_dup(ossl_check_const_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_copyfunc_type(copyfunc), ossl_check_POLICYQUALINFO_freefunc_type(freefunc))) +#define sk_POLICYQUALINFO_set_cmp_func(sk, cmp) ((sk_POLICYQUALINFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_compfunc_type(cmp))) + + + +typedef struct POLICYINFO_st { + ASN1_OBJECT *policyid; + STACK_OF(POLICYQUALINFO) *qualifiers; +} POLICYINFO; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICYINFO, POLICYINFO, POLICYINFO) +#define sk_POLICYINFO_num(sk) OPENSSL_sk_num(ossl_check_const_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_value(sk, idx) ((POLICYINFO *)OPENSSL_sk_value(ossl_check_const_POLICYINFO_sk_type(sk), (idx))) +#define sk_POLICYINFO_new(cmp) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new(ossl_check_POLICYINFO_compfunc_type(cmp))) +#define sk_POLICYINFO_new_null() ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new_null()) +#define sk_POLICYINFO_new_reserve(cmp, n) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new_reserve(ossl_check_POLICYINFO_compfunc_type(cmp), (n))) +#define sk_POLICYINFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICYINFO_sk_type(sk), (n)) +#define sk_POLICYINFO_free(sk) OPENSSL_sk_free(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_zero(sk) OPENSSL_sk_zero(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_delete(sk, i) ((POLICYINFO *)OPENSSL_sk_delete(ossl_check_POLICYINFO_sk_type(sk), (i))) +#define sk_POLICYINFO_delete_ptr(sk, ptr) ((POLICYINFO *)OPENSSL_sk_delete_ptr(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr))) +#define sk_POLICYINFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_pop(sk) ((POLICYINFO *)OPENSSL_sk_pop(ossl_check_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_shift(sk) ((POLICYINFO *)OPENSSL_sk_shift(ossl_check_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICYINFO_sk_type(sk),ossl_check_POLICYINFO_freefunc_type(freefunc)) +#define sk_POLICYINFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr), (idx)) +#define sk_POLICYINFO_set(sk, idx, ptr) ((POLICYINFO *)OPENSSL_sk_set(ossl_check_POLICYINFO_sk_type(sk), (idx), ossl_check_POLICYINFO_type(ptr))) +#define sk_POLICYINFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr), pnum) +#define sk_POLICYINFO_sort(sk) OPENSSL_sk_sort(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_dup(sk) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_dup(ossl_check_const_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_copyfunc_type(copyfunc), ossl_check_POLICYINFO_freefunc_type(freefunc))) +#define sk_POLICYINFO_set_cmp_func(sk, cmp) ((sk_POLICYINFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_compfunc_type(cmp))) + + +typedef STACK_OF(POLICYINFO) CERTIFICATEPOLICIES; + +typedef struct POLICY_MAPPING_st { + ASN1_OBJECT *issuerDomainPolicy; + ASN1_OBJECT *subjectDomainPolicy; +} POLICY_MAPPING; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICY_MAPPING, POLICY_MAPPING, POLICY_MAPPING) +#define sk_POLICY_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_value(sk, idx) ((POLICY_MAPPING *)OPENSSL_sk_value(ossl_check_const_POLICY_MAPPING_sk_type(sk), (idx))) +#define sk_POLICY_MAPPING_new(cmp) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new(ossl_check_POLICY_MAPPING_compfunc_type(cmp))) +#define sk_POLICY_MAPPING_new_null() ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new_null()) +#define sk_POLICY_MAPPING_new_reserve(cmp, n) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_POLICY_MAPPING_compfunc_type(cmp), (n))) +#define sk_POLICY_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICY_MAPPING_sk_type(sk), (n)) +#define sk_POLICY_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_delete(sk, i) ((POLICY_MAPPING *)OPENSSL_sk_delete(ossl_check_POLICY_MAPPING_sk_type(sk), (i))) +#define sk_POLICY_MAPPING_delete_ptr(sk, ptr) ((POLICY_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr))) +#define sk_POLICY_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_pop(sk) ((POLICY_MAPPING *)OPENSSL_sk_pop(ossl_check_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_shift(sk) ((POLICY_MAPPING *)OPENSSL_sk_shift(ossl_check_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICY_MAPPING_sk_type(sk),ossl_check_POLICY_MAPPING_freefunc_type(freefunc)) +#define sk_POLICY_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr), (idx)) +#define sk_POLICY_MAPPING_set(sk, idx, ptr) ((POLICY_MAPPING *)OPENSSL_sk_set(ossl_check_POLICY_MAPPING_sk_type(sk), (idx), ossl_check_POLICY_MAPPING_type(ptr))) +#define sk_POLICY_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr), pnum) +#define sk_POLICY_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_dup(sk) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_copyfunc_type(copyfunc), ossl_check_POLICY_MAPPING_freefunc_type(freefunc))) +#define sk_POLICY_MAPPING_set_cmp_func(sk, cmp) ((sk_POLICY_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_compfunc_type(cmp))) + + +typedef STACK_OF(POLICY_MAPPING) POLICY_MAPPINGS; + +typedef struct GENERAL_SUBTREE_st { + GENERAL_NAME *base; + ASN1_INTEGER *minimum; + ASN1_INTEGER *maximum; +} GENERAL_SUBTREE; + +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_SUBTREE, GENERAL_SUBTREE, GENERAL_SUBTREE) +#define sk_GENERAL_SUBTREE_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_value(sk, idx) ((GENERAL_SUBTREE *)OPENSSL_sk_value(ossl_check_const_GENERAL_SUBTREE_sk_type(sk), (idx))) +#define sk_GENERAL_SUBTREE_new(cmp) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new(ossl_check_GENERAL_SUBTREE_compfunc_type(cmp))) +#define sk_GENERAL_SUBTREE_new_null() ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_SUBTREE_new_reserve(cmp, n) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_SUBTREE_compfunc_type(cmp), (n))) +#define sk_GENERAL_SUBTREE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_SUBTREE_sk_type(sk), (n)) +#define sk_GENERAL_SUBTREE_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_delete(sk, i) ((GENERAL_SUBTREE *)OPENSSL_sk_delete(ossl_check_GENERAL_SUBTREE_sk_type(sk), (i))) +#define sk_GENERAL_SUBTREE_delete_ptr(sk, ptr) ((GENERAL_SUBTREE *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr))) +#define sk_GENERAL_SUBTREE_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_pop(sk) ((GENERAL_SUBTREE *)OPENSSL_sk_pop(ossl_check_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_shift(sk) ((GENERAL_SUBTREE *)OPENSSL_sk_shift(ossl_check_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_SUBTREE_sk_type(sk),ossl_check_GENERAL_SUBTREE_freefunc_type(freefunc)) +#define sk_GENERAL_SUBTREE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr), (idx)) +#define sk_GENERAL_SUBTREE_set(sk, idx, ptr) ((GENERAL_SUBTREE *)OPENSSL_sk_set(ossl_check_GENERAL_SUBTREE_sk_type(sk), (idx), ossl_check_GENERAL_SUBTREE_type(ptr))) +#define sk_GENERAL_SUBTREE_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr), pnum) +#define sk_GENERAL_SUBTREE_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_dup(sk) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_copyfunc_type(copyfunc), ossl_check_GENERAL_SUBTREE_freefunc_type(freefunc))) +#define sk_GENERAL_SUBTREE_set_cmp_func(sk, cmp) ((sk_GENERAL_SUBTREE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_compfunc_type(cmp))) + + +struct NAME_CONSTRAINTS_st { + STACK_OF(GENERAL_SUBTREE) *permittedSubtrees; + STACK_OF(GENERAL_SUBTREE) *excludedSubtrees; +}; + +typedef struct POLICY_CONSTRAINTS_st { + ASN1_INTEGER *requireExplicitPolicy; + ASN1_INTEGER *inhibitPolicyMapping; +} POLICY_CONSTRAINTS; + +/* Proxy certificate structures, see RFC 3820 */ +typedef struct PROXY_POLICY_st { + ASN1_OBJECT *policyLanguage; + ASN1_OCTET_STRING *policy; +} PROXY_POLICY; + +typedef struct PROXY_CERT_INFO_EXTENSION_st { + ASN1_INTEGER *pcPathLengthConstraint; + PROXY_POLICY *proxyPolicy; +} PROXY_CERT_INFO_EXTENSION; + +DECLARE_ASN1_FUNCTIONS(PROXY_POLICY) +DECLARE_ASN1_FUNCTIONS(PROXY_CERT_INFO_EXTENSION) + +struct ISSUING_DIST_POINT_st { + DIST_POINT_NAME *distpoint; + int onlyuser; + int onlyCA; + ASN1_BIT_STRING *onlysomereasons; + int indirectCRL; + int onlyattr; +}; + +/* Values in idp_flags field */ +/* IDP present */ +# define IDP_PRESENT 0x1 +/* IDP values inconsistent */ +# define IDP_INVALID 0x2 +/* onlyuser true */ +# define IDP_ONLYUSER 0x4 +/* onlyCA true */ +# define IDP_ONLYCA 0x8 +/* onlyattr true */ +# define IDP_ONLYATTR 0x10 +/* indirectCRL true */ +# define IDP_INDIRECT 0x20 +/* onlysomereasons present */ +# define IDP_REASONS 0x40 + +# define X509V3_conf_err(val) ERR_add_error_data(6, \ + "section:", (val)->section, \ + ",name:", (val)->name, ",value:", (val)->value) + +# define X509V3_set_ctx_test(ctx) \ + X509V3_set_ctx(ctx, NULL, NULL, NULL, NULL, X509V3_CTX_TEST) +# define X509V3_set_ctx_nodb(ctx) (ctx)->db = NULL; + +# define EXT_BITSTRING(nid, table) { nid, 0, ASN1_ITEM_ref(ASN1_BIT_STRING), \ + 0,0,0,0, \ + 0,0, \ + (X509V3_EXT_I2V)i2v_ASN1_BIT_STRING, \ + (X509V3_EXT_V2I)v2i_ASN1_BIT_STRING, \ + NULL, NULL, \ + table} + +# define EXT_IA5STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_IA5STRING), \ + 0,0,0,0, \ + (X509V3_EXT_I2S)i2s_ASN1_IA5STRING, \ + (X509V3_EXT_S2I)s2i_ASN1_IA5STRING, \ + 0,0,0,0, \ + NULL} + +#define EXT_UTF8STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_UTF8STRING), \ + 0,0,0,0, \ + (X509V3_EXT_I2S)i2s_ASN1_UTF8STRING, \ + (X509V3_EXT_S2I)s2i_ASN1_UTF8STRING, \ + 0,0,0,0, \ + NULL} + +# define EXT_END { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +/* X509_PURPOSE stuff */ + +# define EXFLAG_BCONS 0x1 +# define EXFLAG_KUSAGE 0x2 +# define EXFLAG_XKUSAGE 0x4 +# define EXFLAG_NSCERT 0x8 + +# define EXFLAG_CA 0x10 +# define EXFLAG_SI 0x20 /* self-issued, maybe not self-signed */ +# define EXFLAG_V1 0x40 +# define EXFLAG_INVALID 0x80 +/* EXFLAG_SET is set to indicate that some values have been precomputed */ +# define EXFLAG_SET 0x100 +# define EXFLAG_CRITICAL 0x200 +# define EXFLAG_PROXY 0x400 + +# define EXFLAG_INVALID_POLICY 0x800 +# define EXFLAG_FRESHEST 0x1000 +# define EXFLAG_SS 0x2000 /* cert is apparently self-signed */ + +# define EXFLAG_BCONS_CRITICAL 0x10000 +# define EXFLAG_AKID_CRITICAL 0x20000 +# define EXFLAG_SKID_CRITICAL 0x40000 +# define EXFLAG_SAN_CRITICAL 0x80000 +# define EXFLAG_NO_FINGERPRINT 0x100000 + +# define KU_DIGITAL_SIGNATURE 0x0080 +# define KU_NON_REPUDIATION 0x0040 +# define KU_KEY_ENCIPHERMENT 0x0020 +# define KU_DATA_ENCIPHERMENT 0x0010 +# define KU_KEY_AGREEMENT 0x0008 +# define KU_KEY_CERT_SIGN 0x0004 +# define KU_CRL_SIGN 0x0002 +# define KU_ENCIPHER_ONLY 0x0001 +# define KU_DECIPHER_ONLY 0x8000 + +# define NS_SSL_CLIENT 0x80 +# define NS_SSL_SERVER 0x40 +# define NS_SMIME 0x20 +# define NS_OBJSIGN 0x10 +# define NS_SSL_CA 0x04 +# define NS_SMIME_CA 0x02 +# define NS_OBJSIGN_CA 0x01 +# define NS_ANY_CA (NS_SSL_CA|NS_SMIME_CA|NS_OBJSIGN_CA) + +# define XKU_SSL_SERVER 0x1 +# define XKU_SSL_CLIENT 0x2 +# define XKU_SMIME 0x4 +# define XKU_CODE_SIGN 0x8 +# define XKU_SGC 0x10 /* Netscape or MS Server-Gated Crypto */ +# define XKU_OCSP_SIGN 0x20 +# define XKU_TIMESTAMP 0x40 +# define XKU_DVCS 0x80 +# define XKU_ANYEKU 0x100 + +# define X509_PURPOSE_DYNAMIC 0x1 +# define X509_PURPOSE_DYNAMIC_NAME 0x2 + +typedef struct x509_purpose_st { + int purpose; + int trust; /* Default trust ID */ + int flags; + int (*check_purpose) (const struct x509_purpose_st *, const X509 *, int); + char *name; + char *sname; + void *usr_data; +} X509_PURPOSE; + +SKM_DEFINE_STACK_OF_INTERNAL(X509_PURPOSE, X509_PURPOSE, X509_PURPOSE) +#define sk_X509_PURPOSE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_value(sk, idx) ((X509_PURPOSE *)OPENSSL_sk_value(ossl_check_const_X509_PURPOSE_sk_type(sk), (idx))) +#define sk_X509_PURPOSE_new(cmp) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new(ossl_check_X509_PURPOSE_compfunc_type(cmp))) +#define sk_X509_PURPOSE_new_null() ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new_null()) +#define sk_X509_PURPOSE_new_reserve(cmp, n) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new_reserve(ossl_check_X509_PURPOSE_compfunc_type(cmp), (n))) +#define sk_X509_PURPOSE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_PURPOSE_sk_type(sk), (n)) +#define sk_X509_PURPOSE_free(sk) OPENSSL_sk_free(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_delete(sk, i) ((X509_PURPOSE *)OPENSSL_sk_delete(ossl_check_X509_PURPOSE_sk_type(sk), (i))) +#define sk_X509_PURPOSE_delete_ptr(sk, ptr) ((X509_PURPOSE *)OPENSSL_sk_delete_ptr(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr))) +#define sk_X509_PURPOSE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_pop(sk) ((X509_PURPOSE *)OPENSSL_sk_pop(ossl_check_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_shift(sk) ((X509_PURPOSE *)OPENSSL_sk_shift(ossl_check_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_PURPOSE_sk_type(sk),ossl_check_X509_PURPOSE_freefunc_type(freefunc)) +#define sk_X509_PURPOSE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr), (idx)) +#define sk_X509_PURPOSE_set(sk, idx, ptr) ((X509_PURPOSE *)OPENSSL_sk_set(ossl_check_X509_PURPOSE_sk_type(sk), (idx), ossl_check_X509_PURPOSE_type(ptr))) +#define sk_X509_PURPOSE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr), pnum) +#define sk_X509_PURPOSE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_dup(sk) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_dup(ossl_check_const_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_copyfunc_type(copyfunc), ossl_check_X509_PURPOSE_freefunc_type(freefunc))) +#define sk_X509_PURPOSE_set_cmp_func(sk, cmp) ((sk_X509_PURPOSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_compfunc_type(cmp))) + + + +# define X509_PURPOSE_SSL_CLIENT 1 +# define X509_PURPOSE_SSL_SERVER 2 +# define X509_PURPOSE_NS_SSL_SERVER 3 +# define X509_PURPOSE_SMIME_SIGN 4 +# define X509_PURPOSE_SMIME_ENCRYPT 5 +# define X509_PURPOSE_CRL_SIGN 6 +# define X509_PURPOSE_ANY 7 +# define X509_PURPOSE_OCSP_HELPER 8 +# define X509_PURPOSE_TIMESTAMP_SIGN 9 +# define X509_PURPOSE_CODE_SIGN 10 + +# define X509_PURPOSE_MIN 1 +# define X509_PURPOSE_MAX 10 + +/* Flags for X509V3_EXT_print() */ + +# define X509V3_EXT_UNKNOWN_MASK (0xfL << 16) +/* Return error for unknown extensions */ +# define X509V3_EXT_DEFAULT 0 +/* Print error for unknown extensions */ +# define X509V3_EXT_ERROR_UNKNOWN (1L << 16) +/* ASN1 parse unknown extensions */ +# define X509V3_EXT_PARSE_UNKNOWN (2L << 16) +/* BIO_dump unknown extensions */ +# define X509V3_EXT_DUMP_UNKNOWN (3L << 16) + +/* Flags for X509V3_add1_i2d */ + +# define X509V3_ADD_OP_MASK 0xfL +# define X509V3_ADD_DEFAULT 0L +# define X509V3_ADD_APPEND 1L +# define X509V3_ADD_REPLACE 2L +# define X509V3_ADD_REPLACE_EXISTING 3L +# define X509V3_ADD_KEEP_EXISTING 4L +# define X509V3_ADD_DELETE 5L +# define X509V3_ADD_SILENT 0x10 + +DECLARE_ASN1_FUNCTIONS(BASIC_CONSTRAINTS) + +DECLARE_ASN1_FUNCTIONS(SXNET) +DECLARE_ASN1_FUNCTIONS(SXNETID) + +DECLARE_ASN1_FUNCTIONS(ISSUER_SIGN_TOOL) + +int SXNET_add_id_asc(SXNET **psx, const char *zone, const char *user, int userlen); +int SXNET_add_id_ulong(SXNET **psx, unsigned long lzone, const char *user, + int userlen); +int SXNET_add_id_INTEGER(SXNET **psx, ASN1_INTEGER *izone, const char *user, + int userlen); + +ASN1_OCTET_STRING *SXNET_get_id_asc(SXNET *sx, const char *zone); +ASN1_OCTET_STRING *SXNET_get_id_ulong(SXNET *sx, unsigned long lzone); +ASN1_OCTET_STRING *SXNET_get_id_INTEGER(SXNET *sx, ASN1_INTEGER *zone); + +DECLARE_ASN1_FUNCTIONS(AUTHORITY_KEYID) + +DECLARE_ASN1_FUNCTIONS(PKEY_USAGE_PERIOD) + +DECLARE_ASN1_FUNCTIONS(GENERAL_NAME) +DECLARE_ASN1_DUP_FUNCTION(GENERAL_NAME) +int GENERAL_NAME_cmp(GENERAL_NAME *a, GENERAL_NAME *b); + +ASN1_BIT_STRING *v2i_ASN1_BIT_STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, + STACK_OF(CONF_VALUE) *nval); +STACK_OF(CONF_VALUE) *i2v_ASN1_BIT_STRING(X509V3_EXT_METHOD *method, + ASN1_BIT_STRING *bits, + STACK_OF(CONF_VALUE) *extlist); +char *i2s_ASN1_IA5STRING(X509V3_EXT_METHOD *method, ASN1_IA5STRING *ia5); +ASN1_IA5STRING *s2i_ASN1_IA5STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); +char *i2s_ASN1_UTF8STRING(X509V3_EXT_METHOD *method, ASN1_UTF8STRING *utf8); +ASN1_UTF8STRING *s2i_ASN1_UTF8STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); + +STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(X509V3_EXT_METHOD *method, + GENERAL_NAME *gen, + STACK_OF(CONF_VALUE) *ret); +int GENERAL_NAME_print(BIO *out, GENERAL_NAME *gen); + +DECLARE_ASN1_FUNCTIONS(GENERAL_NAMES) + +STACK_OF(CONF_VALUE) *i2v_GENERAL_NAMES(X509V3_EXT_METHOD *method, + GENERAL_NAMES *gen, + STACK_OF(CONF_VALUE) *extlist); +GENERAL_NAMES *v2i_GENERAL_NAMES(const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *nval); + +DECLARE_ASN1_FUNCTIONS(OTHERNAME) +DECLARE_ASN1_FUNCTIONS(EDIPARTYNAME) +int OTHERNAME_cmp(OTHERNAME *a, OTHERNAME *b); +void GENERAL_NAME_set0_value(GENERAL_NAME *a, int type, void *value); +void *GENERAL_NAME_get0_value(const GENERAL_NAME *a, int *ptype); +int GENERAL_NAME_set0_othername(GENERAL_NAME *gen, + ASN1_OBJECT *oid, ASN1_TYPE *value); +int GENERAL_NAME_get0_otherName(const GENERAL_NAME *gen, + ASN1_OBJECT **poid, ASN1_TYPE **pvalue); + +char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, + const ASN1_OCTET_STRING *ia5); +ASN1_OCTET_STRING *s2i_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); + +DECLARE_ASN1_FUNCTIONS(EXTENDED_KEY_USAGE) +int i2a_ACCESS_DESCRIPTION(BIO *bp, const ACCESS_DESCRIPTION *a); + +DECLARE_ASN1_ALLOC_FUNCTIONS(TLS_FEATURE) + +DECLARE_ASN1_FUNCTIONS(CERTIFICATEPOLICIES) +DECLARE_ASN1_FUNCTIONS(POLICYINFO) +DECLARE_ASN1_FUNCTIONS(POLICYQUALINFO) +DECLARE_ASN1_FUNCTIONS(USERNOTICE) +DECLARE_ASN1_FUNCTIONS(NOTICEREF) + +DECLARE_ASN1_FUNCTIONS(CRL_DIST_POINTS) +DECLARE_ASN1_FUNCTIONS(DIST_POINT) +DECLARE_ASN1_FUNCTIONS(DIST_POINT_NAME) +DECLARE_ASN1_FUNCTIONS(ISSUING_DIST_POINT) + +int DIST_POINT_set_dpname(DIST_POINT_NAME *dpn, const X509_NAME *iname); + +int NAME_CONSTRAINTS_check(X509 *x, NAME_CONSTRAINTS *nc); +int NAME_CONSTRAINTS_check_CN(X509 *x, NAME_CONSTRAINTS *nc); + +DECLARE_ASN1_FUNCTIONS(ACCESS_DESCRIPTION) +DECLARE_ASN1_FUNCTIONS(AUTHORITY_INFO_ACCESS) + +DECLARE_ASN1_ITEM(POLICY_MAPPING) +DECLARE_ASN1_ALLOC_FUNCTIONS(POLICY_MAPPING) +DECLARE_ASN1_ITEM(POLICY_MAPPINGS) + +DECLARE_ASN1_ITEM(GENERAL_SUBTREE) +DECLARE_ASN1_ALLOC_FUNCTIONS(GENERAL_SUBTREE) + +DECLARE_ASN1_ITEM(NAME_CONSTRAINTS) +DECLARE_ASN1_ALLOC_FUNCTIONS(NAME_CONSTRAINTS) + +DECLARE_ASN1_ALLOC_FUNCTIONS(POLICY_CONSTRAINTS) +DECLARE_ASN1_ITEM(POLICY_CONSTRAINTS) + +GENERAL_NAME *a2i_GENERAL_NAME(GENERAL_NAME *out, + const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, int gen_type, + const char *value, int is_nc); + +# ifdef OPENSSL_CONF_H +GENERAL_NAME *v2i_GENERAL_NAME(const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, CONF_VALUE *cnf); +GENERAL_NAME *v2i_GENERAL_NAME_ex(GENERAL_NAME *out, + const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, CONF_VALUE *cnf, + int is_nc); + +void X509V3_conf_free(CONF_VALUE *val); + +X509_EXTENSION *X509V3_EXT_nconf_nid(CONF *conf, X509V3_CTX *ctx, int ext_nid, + const char *value); +X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx, const char *name, + const char *value); +int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx, const char *section, + STACK_OF(X509_EXTENSION) **sk); +int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509 *cert); +int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509_REQ *req); +int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509_CRL *crl); + +X509_EXTENSION *X509V3_EXT_conf_nid(LHASH_OF(CONF_VALUE) *conf, + X509V3_CTX *ctx, int ext_nid, + const char *value); +X509_EXTENSION *X509V3_EXT_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *name, const char *value); +int X509V3_EXT_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509 *cert); +int X509V3_EXT_REQ_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509_REQ *req); +int X509V3_EXT_CRL_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509_CRL *crl); + +int X509V3_add_value_bool_nf(const char *name, int asn1_bool, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_get_value_bool(const CONF_VALUE *value, int *asn1_bool); +int X509V3_get_value_int(const CONF_VALUE *value, ASN1_INTEGER **aint); +void X509V3_set_nconf(X509V3_CTX *ctx, CONF *conf); +void X509V3_set_conf_lhash(X509V3_CTX *ctx, LHASH_OF(CONF_VALUE) *lhash); +# endif + +char *X509V3_get_string(X509V3_CTX *ctx, const char *name, const char *section); +STACK_OF(CONF_VALUE) *X509V3_get_section(X509V3_CTX *ctx, const char *section); +void X509V3_string_free(X509V3_CTX *ctx, char *str); +void X509V3_section_free(X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *section); +void X509V3_set_ctx(X509V3_CTX *ctx, X509 *issuer, X509 *subject, + X509_REQ *req, X509_CRL *crl, int flags); +/* For API backward compatibility, this is separate from X509V3_set_ctx(): */ +int X509V3_set_issuer_pkey(X509V3_CTX *ctx, EVP_PKEY *pkey); + +int X509V3_add_value(const char *name, const char *value, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_uchar(const char *name, const unsigned char *value, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_bool(const char *name, int asn1_bool, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_int(const char *name, const ASN1_INTEGER *aint, + STACK_OF(CONF_VALUE) **extlist); +char *i2s_ASN1_INTEGER(X509V3_EXT_METHOD *meth, const ASN1_INTEGER *aint); +ASN1_INTEGER *s2i_ASN1_INTEGER(X509V3_EXT_METHOD *meth, const char *value); +char *i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *meth, const ASN1_ENUMERATED *aint); +char *i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *meth, + const ASN1_ENUMERATED *aint); +int X509V3_EXT_add(X509V3_EXT_METHOD *ext); +int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist); +int X509V3_EXT_add_alias(int nid_to, int nid_from); +void X509V3_EXT_cleanup(void); + +const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext); +const X509V3_EXT_METHOD *X509V3_EXT_get_nid(int nid); +int X509V3_add_standard_extensions(void); +STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line); +void *X509V3_EXT_d2i(X509_EXTENSION *ext); +void *X509V3_get_d2i(const STACK_OF(X509_EXTENSION) *x, int nid, int *crit, + int *idx); + +X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit, void *ext_struc); +int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid, void *value, + int crit, unsigned long flags); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* The new declarations are in crypto.h, but the old ones were here. */ +# define hex_to_string OPENSSL_buf2hexstr +# define string_to_hex OPENSSL_hexstr2buf +#endif + +void X509V3_EXT_val_prn(BIO *out, STACK_OF(CONF_VALUE) *val, int indent, + int ml); +int X509V3_EXT_print(BIO *out, X509_EXTENSION *ext, unsigned long flag, + int indent); +#ifndef OPENSSL_NO_STDIO +int X509V3_EXT_print_fp(FILE *out, X509_EXTENSION *ext, int flag, int indent); +#endif +int X509V3_extensions_print(BIO *out, const char *title, + const STACK_OF(X509_EXTENSION) *exts, + unsigned long flag, int indent); + +int X509_check_ca(X509 *x); +int X509_check_purpose(X509 *x, int id, int ca); +int X509_supported_extension(X509_EXTENSION *ex); +int X509_PURPOSE_set(int *p, int purpose); +int X509_check_issued(X509 *issuer, X509 *subject); +int X509_check_akid(const X509 *issuer, const AUTHORITY_KEYID *akid); +void X509_set_proxy_flag(X509 *x); +void X509_set_proxy_pathlen(X509 *x, long l); +long X509_get_proxy_pathlen(X509 *x); + +uint32_t X509_get_extension_flags(X509 *x); +uint32_t X509_get_key_usage(X509 *x); +uint32_t X509_get_extended_key_usage(X509 *x); +const ASN1_OCTET_STRING *X509_get0_subject_key_id(X509 *x); +const ASN1_OCTET_STRING *X509_get0_authority_key_id(X509 *x); +const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x); +const ASN1_INTEGER *X509_get0_authority_serial(X509 *x); + +int X509_PURPOSE_get_count(void); +X509_PURPOSE *X509_PURPOSE_get0(int idx); +int X509_PURPOSE_get_by_sname(const char *sname); +int X509_PURPOSE_get_by_id(int id); +int X509_PURPOSE_add(int id, int trust, int flags, + int (*ck) (const X509_PURPOSE *, const X509 *, int), + const char *name, const char *sname, void *arg); +char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp); +char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp); +int X509_PURPOSE_get_trust(const X509_PURPOSE *xp); +void X509_PURPOSE_cleanup(void); +int X509_PURPOSE_get_id(const X509_PURPOSE *); + +STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x); +STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x); +void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); +STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); +/* Flags for X509_check_* functions */ + +/* + * Always check subject name for host match even if subject alt names present + */ +# define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0x1 +/* Disable wildcard matching for dnsName fields and common name. */ +# define X509_CHECK_FLAG_NO_WILDCARDS 0x2 +/* Wildcards must not match a partial label. */ +# define X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS 0x4 +/* Allow (non-partial) wildcards to match multiple labels. */ +# define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8 +/* Constraint verifier subdomain patterns to match a single labels. */ +# define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10 +/* Never check the subject CN */ +# define X509_CHECK_FLAG_NEVER_CHECK_SUBJECT 0x20 +/* + * Match reference identifiers starting with "." to any sub-domain. + * This is a non-public flag, turned on implicitly when the subject + * reference identity is a DNS name. + */ +# define _X509_CHECK_FLAG_DOT_SUBDOMAINS 0x8000 + +int X509_check_host(X509 *x, const char *chk, size_t chklen, + unsigned int flags, char **peername); +int X509_check_email(X509 *x, const char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags); + +ASN1_OCTET_STRING *a2i_IPADDRESS(const char *ipasc); +ASN1_OCTET_STRING *a2i_IPADDRESS_NC(const char *ipasc); +int X509V3_NAME_from_section(X509_NAME *nm, STACK_OF(CONF_VALUE) *dn_sk, + unsigned long chtype); + +void X509_POLICY_NODE_print(BIO *out, X509_POLICY_NODE *node, int indent); +SKM_DEFINE_STACK_OF_INTERNAL(X509_POLICY_NODE, X509_POLICY_NODE, X509_POLICY_NODE) +#define sk_X509_POLICY_NODE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_value(sk, idx) ((X509_POLICY_NODE *)OPENSSL_sk_value(ossl_check_const_X509_POLICY_NODE_sk_type(sk), (idx))) +#define sk_X509_POLICY_NODE_new(cmp) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new(ossl_check_X509_POLICY_NODE_compfunc_type(cmp))) +#define sk_X509_POLICY_NODE_new_null() ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new_null()) +#define sk_X509_POLICY_NODE_new_reserve(cmp, n) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new_reserve(ossl_check_X509_POLICY_NODE_compfunc_type(cmp), (n))) +#define sk_X509_POLICY_NODE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_POLICY_NODE_sk_type(sk), (n)) +#define sk_X509_POLICY_NODE_free(sk) OPENSSL_sk_free(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_delete(sk, i) ((X509_POLICY_NODE *)OPENSSL_sk_delete(ossl_check_X509_POLICY_NODE_sk_type(sk), (i))) +#define sk_X509_POLICY_NODE_delete_ptr(sk, ptr) ((X509_POLICY_NODE *)OPENSSL_sk_delete_ptr(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr))) +#define sk_X509_POLICY_NODE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_pop(sk) ((X509_POLICY_NODE *)OPENSSL_sk_pop(ossl_check_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_shift(sk) ((X509_POLICY_NODE *)OPENSSL_sk_shift(ossl_check_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_POLICY_NODE_sk_type(sk),ossl_check_X509_POLICY_NODE_freefunc_type(freefunc)) +#define sk_X509_POLICY_NODE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr), (idx)) +#define sk_X509_POLICY_NODE_set(sk, idx, ptr) ((X509_POLICY_NODE *)OPENSSL_sk_set(ossl_check_X509_POLICY_NODE_sk_type(sk), (idx), ossl_check_X509_POLICY_NODE_type(ptr))) +#define sk_X509_POLICY_NODE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr), pnum) +#define sk_X509_POLICY_NODE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_dup(sk) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_dup(ossl_check_const_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_copyfunc_type(copyfunc), ossl_check_X509_POLICY_NODE_freefunc_type(freefunc))) +#define sk_X509_POLICY_NODE_set_cmp_func(sk, cmp) ((sk_X509_POLICY_NODE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_compfunc_type(cmp))) + + + +#ifndef OPENSSL_NO_RFC3779 +typedef struct ASRange_st { + ASN1_INTEGER *min, *max; +} ASRange; + +# define ASIdOrRange_id 0 +# define ASIdOrRange_range 1 + +typedef struct ASIdOrRange_st { + int type; + union { + ASN1_INTEGER *id; + ASRange *range; + } u; +} ASIdOrRange; + +SKM_DEFINE_STACK_OF_INTERNAL(ASIdOrRange, ASIdOrRange, ASIdOrRange) +#define sk_ASIdOrRange_num(sk) OPENSSL_sk_num(ossl_check_const_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_value(sk, idx) ((ASIdOrRange *)OPENSSL_sk_value(ossl_check_const_ASIdOrRange_sk_type(sk), (idx))) +#define sk_ASIdOrRange_new(cmp) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new(ossl_check_ASIdOrRange_compfunc_type(cmp))) +#define sk_ASIdOrRange_new_null() ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new_null()) +#define sk_ASIdOrRange_new_reserve(cmp, n) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new_reserve(ossl_check_ASIdOrRange_compfunc_type(cmp), (n))) +#define sk_ASIdOrRange_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASIdOrRange_sk_type(sk), (n)) +#define sk_ASIdOrRange_free(sk) OPENSSL_sk_free(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_zero(sk) OPENSSL_sk_zero(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_delete(sk, i) ((ASIdOrRange *)OPENSSL_sk_delete(ossl_check_ASIdOrRange_sk_type(sk), (i))) +#define sk_ASIdOrRange_delete_ptr(sk, ptr) ((ASIdOrRange *)OPENSSL_sk_delete_ptr(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr))) +#define sk_ASIdOrRange_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_pop(sk) ((ASIdOrRange *)OPENSSL_sk_pop(ossl_check_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_shift(sk) ((ASIdOrRange *)OPENSSL_sk_shift(ossl_check_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASIdOrRange_sk_type(sk),ossl_check_ASIdOrRange_freefunc_type(freefunc)) +#define sk_ASIdOrRange_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr), (idx)) +#define sk_ASIdOrRange_set(sk, idx, ptr) ((ASIdOrRange *)OPENSSL_sk_set(ossl_check_ASIdOrRange_sk_type(sk), (idx), ossl_check_ASIdOrRange_type(ptr))) +#define sk_ASIdOrRange_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr), pnum) +#define sk_ASIdOrRange_sort(sk) OPENSSL_sk_sort(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_dup(sk) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_dup(ossl_check_const_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_deep_copy(ossl_check_const_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_copyfunc_type(copyfunc), ossl_check_ASIdOrRange_freefunc_type(freefunc))) +#define sk_ASIdOrRange_set_cmp_func(sk, cmp) ((sk_ASIdOrRange_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_compfunc_type(cmp))) + + +typedef STACK_OF(ASIdOrRange) ASIdOrRanges; + +# define ASIdentifierChoice_inherit 0 +# define ASIdentifierChoice_asIdsOrRanges 1 + +typedef struct ASIdentifierChoice_st { + int type; + union { + ASN1_NULL *inherit; + ASIdOrRanges *asIdsOrRanges; + } u; +} ASIdentifierChoice; + +typedef struct ASIdentifiers_st { + ASIdentifierChoice *asnum, *rdi; +} ASIdentifiers; + +DECLARE_ASN1_FUNCTIONS(ASRange) +DECLARE_ASN1_FUNCTIONS(ASIdOrRange) +DECLARE_ASN1_FUNCTIONS(ASIdentifierChoice) +DECLARE_ASN1_FUNCTIONS(ASIdentifiers) + +typedef struct IPAddressRange_st { + ASN1_BIT_STRING *min, *max; +} IPAddressRange; + +# define IPAddressOrRange_addressPrefix 0 +# define IPAddressOrRange_addressRange 1 + +typedef struct IPAddressOrRange_st { + int type; + union { + ASN1_BIT_STRING *addressPrefix; + IPAddressRange *addressRange; + } u; +} IPAddressOrRange; + +SKM_DEFINE_STACK_OF_INTERNAL(IPAddressOrRange, IPAddressOrRange, IPAddressOrRange) +#define sk_IPAddressOrRange_num(sk) OPENSSL_sk_num(ossl_check_const_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_value(sk, idx) ((IPAddressOrRange *)OPENSSL_sk_value(ossl_check_const_IPAddressOrRange_sk_type(sk), (idx))) +#define sk_IPAddressOrRange_new(cmp) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new(ossl_check_IPAddressOrRange_compfunc_type(cmp))) +#define sk_IPAddressOrRange_new_null() ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new_null()) +#define sk_IPAddressOrRange_new_reserve(cmp, n) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new_reserve(ossl_check_IPAddressOrRange_compfunc_type(cmp), (n))) +#define sk_IPAddressOrRange_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_IPAddressOrRange_sk_type(sk), (n)) +#define sk_IPAddressOrRange_free(sk) OPENSSL_sk_free(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_zero(sk) OPENSSL_sk_zero(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_delete(sk, i) ((IPAddressOrRange *)OPENSSL_sk_delete(ossl_check_IPAddressOrRange_sk_type(sk), (i))) +#define sk_IPAddressOrRange_delete_ptr(sk, ptr) ((IPAddressOrRange *)OPENSSL_sk_delete_ptr(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr))) +#define sk_IPAddressOrRange_push(sk, ptr) OPENSSL_sk_push(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_pop(sk) ((IPAddressOrRange *)OPENSSL_sk_pop(ossl_check_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_shift(sk) ((IPAddressOrRange *)OPENSSL_sk_shift(ossl_check_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_IPAddressOrRange_sk_type(sk),ossl_check_IPAddressOrRange_freefunc_type(freefunc)) +#define sk_IPAddressOrRange_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr), (idx)) +#define sk_IPAddressOrRange_set(sk, idx, ptr) ((IPAddressOrRange *)OPENSSL_sk_set(ossl_check_IPAddressOrRange_sk_type(sk), (idx), ossl_check_IPAddressOrRange_type(ptr))) +#define sk_IPAddressOrRange_find(sk, ptr) OPENSSL_sk_find(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr), pnum) +#define sk_IPAddressOrRange_sort(sk) OPENSSL_sk_sort(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_dup(sk) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_dup(ossl_check_const_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_deep_copy(ossl_check_const_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_copyfunc_type(copyfunc), ossl_check_IPAddressOrRange_freefunc_type(freefunc))) +#define sk_IPAddressOrRange_set_cmp_func(sk, cmp) ((sk_IPAddressOrRange_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_compfunc_type(cmp))) + + +typedef STACK_OF(IPAddressOrRange) IPAddressOrRanges; + +# define IPAddressChoice_inherit 0 +# define IPAddressChoice_addressesOrRanges 1 + +typedef struct IPAddressChoice_st { + int type; + union { + ASN1_NULL *inherit; + IPAddressOrRanges *addressesOrRanges; + } u; +} IPAddressChoice; + +typedef struct IPAddressFamily_st { + ASN1_OCTET_STRING *addressFamily; + IPAddressChoice *ipAddressChoice; +} IPAddressFamily; + +SKM_DEFINE_STACK_OF_INTERNAL(IPAddressFamily, IPAddressFamily, IPAddressFamily) +#define sk_IPAddressFamily_num(sk) OPENSSL_sk_num(ossl_check_const_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_value(sk, idx) ((IPAddressFamily *)OPENSSL_sk_value(ossl_check_const_IPAddressFamily_sk_type(sk), (idx))) +#define sk_IPAddressFamily_new(cmp) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new(ossl_check_IPAddressFamily_compfunc_type(cmp))) +#define sk_IPAddressFamily_new_null() ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new_null()) +#define sk_IPAddressFamily_new_reserve(cmp, n) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new_reserve(ossl_check_IPAddressFamily_compfunc_type(cmp), (n))) +#define sk_IPAddressFamily_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_IPAddressFamily_sk_type(sk), (n)) +#define sk_IPAddressFamily_free(sk) OPENSSL_sk_free(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_zero(sk) OPENSSL_sk_zero(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_delete(sk, i) ((IPAddressFamily *)OPENSSL_sk_delete(ossl_check_IPAddressFamily_sk_type(sk), (i))) +#define sk_IPAddressFamily_delete_ptr(sk, ptr) ((IPAddressFamily *)OPENSSL_sk_delete_ptr(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr))) +#define sk_IPAddressFamily_push(sk, ptr) OPENSSL_sk_push(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_pop(sk) ((IPAddressFamily *)OPENSSL_sk_pop(ossl_check_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_shift(sk) ((IPAddressFamily *)OPENSSL_sk_shift(ossl_check_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_IPAddressFamily_sk_type(sk),ossl_check_IPAddressFamily_freefunc_type(freefunc)) +#define sk_IPAddressFamily_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr), (idx)) +#define sk_IPAddressFamily_set(sk, idx, ptr) ((IPAddressFamily *)OPENSSL_sk_set(ossl_check_IPAddressFamily_sk_type(sk), (idx), ossl_check_IPAddressFamily_type(ptr))) +#define sk_IPAddressFamily_find(sk, ptr) OPENSSL_sk_find(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr), pnum) +#define sk_IPAddressFamily_sort(sk) OPENSSL_sk_sort(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_dup(sk) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_dup(ossl_check_const_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_deep_copy(ossl_check_const_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_copyfunc_type(copyfunc), ossl_check_IPAddressFamily_freefunc_type(freefunc))) +#define sk_IPAddressFamily_set_cmp_func(sk, cmp) ((sk_IPAddressFamily_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_compfunc_type(cmp))) + + + +typedef STACK_OF(IPAddressFamily) IPAddrBlocks; + +DECLARE_ASN1_FUNCTIONS(IPAddressRange) +DECLARE_ASN1_FUNCTIONS(IPAddressOrRange) +DECLARE_ASN1_FUNCTIONS(IPAddressChoice) +DECLARE_ASN1_FUNCTIONS(IPAddressFamily) + +/* + * API tag for elements of the ASIdentifer SEQUENCE. + */ +# define V3_ASID_ASNUM 0 +# define V3_ASID_RDI 1 + +/* + * AFI values, assigned by IANA. It'd be nice to make the AFI + * handling code totally generic, but there are too many little things + * that would need to be defined for other address families for it to + * be worth the trouble. + */ +# define IANA_AFI_IPV4 1 +# define IANA_AFI_IPV6 2 + +/* + * Utilities to construct and extract values from RFC3779 extensions, + * since some of the encodings (particularly for IP address prefixes + * and ranges) are a bit tedious to work with directly. + */ +int X509v3_asid_add_inherit(ASIdentifiers *asid, int which); +int X509v3_asid_add_id_or_range(ASIdentifiers *asid, int which, + ASN1_INTEGER *min, ASN1_INTEGER *max); +int X509v3_addr_add_inherit(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi); +int X509v3_addr_add_prefix(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi, + unsigned char *a, const int prefixlen); +int X509v3_addr_add_range(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi, + unsigned char *min, unsigned char *max); +unsigned X509v3_addr_get_afi(const IPAddressFamily *f); +int X509v3_addr_get_range(IPAddressOrRange *aor, const unsigned afi, + unsigned char *min, unsigned char *max, + const int length); + +/* + * Canonical forms. + */ +int X509v3_asid_is_canonical(ASIdentifiers *asid); +int X509v3_addr_is_canonical(IPAddrBlocks *addr); +int X509v3_asid_canonize(ASIdentifiers *asid); +int X509v3_addr_canonize(IPAddrBlocks *addr); + +/* + * Tests for inheritance and containment. + */ +int X509v3_asid_inherits(ASIdentifiers *asid); +int X509v3_addr_inherits(IPAddrBlocks *addr); +int X509v3_asid_subset(ASIdentifiers *a, ASIdentifiers *b); +int X509v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b); + +/* + * Check whether RFC 3779 extensions nest properly in chains. + */ +int X509v3_asid_validate_path(X509_STORE_CTX *); +int X509v3_addr_validate_path(X509_STORE_CTX *); +int X509v3_asid_validate_resource_set(STACK_OF(X509) *chain, + ASIdentifiers *ext, + int allow_inheritance); +int X509v3_addr_validate_resource_set(STACK_OF(X509) *chain, + IPAddrBlocks *ext, int allow_inheritance); + +#endif /* OPENSSL_NO_RFC3779 */ + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_STRING, ASN1_STRING, ASN1_STRING) +#define sk_ASN1_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_value(sk, idx) ((ASN1_STRING *)OPENSSL_sk_value(ossl_check_const_ASN1_STRING_sk_type(sk), (idx))) +#define sk_ASN1_STRING_new(cmp) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new(ossl_check_ASN1_STRING_compfunc_type(cmp))) +#define sk_ASN1_STRING_new_null() ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_STRING_new_reserve(cmp, n) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_STRING_compfunc_type(cmp), (n))) +#define sk_ASN1_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_STRING_sk_type(sk), (n)) +#define sk_ASN1_STRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_delete(sk, i) ((ASN1_STRING *)OPENSSL_sk_delete(ossl_check_ASN1_STRING_sk_type(sk), (i))) +#define sk_ASN1_STRING_delete_ptr(sk, ptr) ((ASN1_STRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr))) +#define sk_ASN1_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_pop(sk) ((ASN1_STRING *)OPENSSL_sk_pop(ossl_check_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_shift(sk) ((ASN1_STRING *)OPENSSL_sk_shift(ossl_check_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_STRING_sk_type(sk),ossl_check_ASN1_STRING_freefunc_type(freefunc)) +#define sk_ASN1_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr), (idx)) +#define sk_ASN1_STRING_set(sk, idx, ptr) ((ASN1_STRING *)OPENSSL_sk_set(ossl_check_ASN1_STRING_sk_type(sk), (idx), ossl_check_ASN1_STRING_type(ptr))) +#define sk_ASN1_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr), pnum) +#define sk_ASN1_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_dup(sk) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_copyfunc_type(copyfunc), ossl_check_ASN1_STRING_freefunc_type(freefunc))) +#define sk_ASN1_STRING_set_cmp_func(sk, cmp) ((sk_ASN1_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_compfunc_type(cmp))) + + +/* + * Admission Syntax + */ +typedef struct NamingAuthority_st NAMING_AUTHORITY; +typedef struct ProfessionInfo_st PROFESSION_INFO; +typedef struct Admissions_st ADMISSIONS; +typedef struct AdmissionSyntax_st ADMISSION_SYNTAX; +DECLARE_ASN1_FUNCTIONS(NAMING_AUTHORITY) +DECLARE_ASN1_FUNCTIONS(PROFESSION_INFO) +DECLARE_ASN1_FUNCTIONS(ADMISSIONS) +DECLARE_ASN1_FUNCTIONS(ADMISSION_SYNTAX) +SKM_DEFINE_STACK_OF_INTERNAL(PROFESSION_INFO, PROFESSION_INFO, PROFESSION_INFO) +#define sk_PROFESSION_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_value(sk, idx) ((PROFESSION_INFO *)OPENSSL_sk_value(ossl_check_const_PROFESSION_INFO_sk_type(sk), (idx))) +#define sk_PROFESSION_INFO_new(cmp) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new(ossl_check_PROFESSION_INFO_compfunc_type(cmp))) +#define sk_PROFESSION_INFO_new_null() ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new_null()) +#define sk_PROFESSION_INFO_new_reserve(cmp, n) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PROFESSION_INFO_compfunc_type(cmp), (n))) +#define sk_PROFESSION_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PROFESSION_INFO_sk_type(sk), (n)) +#define sk_PROFESSION_INFO_free(sk) OPENSSL_sk_free(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_delete(sk, i) ((PROFESSION_INFO *)OPENSSL_sk_delete(ossl_check_PROFESSION_INFO_sk_type(sk), (i))) +#define sk_PROFESSION_INFO_delete_ptr(sk, ptr) ((PROFESSION_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr))) +#define sk_PROFESSION_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_pop(sk) ((PROFESSION_INFO *)OPENSSL_sk_pop(ossl_check_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_shift(sk) ((PROFESSION_INFO *)OPENSSL_sk_shift(ossl_check_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PROFESSION_INFO_sk_type(sk),ossl_check_PROFESSION_INFO_freefunc_type(freefunc)) +#define sk_PROFESSION_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr), (idx)) +#define sk_PROFESSION_INFO_set(sk, idx, ptr) ((PROFESSION_INFO *)OPENSSL_sk_set(ossl_check_PROFESSION_INFO_sk_type(sk), (idx), ossl_check_PROFESSION_INFO_type(ptr))) +#define sk_PROFESSION_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr), pnum) +#define sk_PROFESSION_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_dup(sk) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_dup(ossl_check_const_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_copyfunc_type(copyfunc), ossl_check_PROFESSION_INFO_freefunc_type(freefunc))) +#define sk_PROFESSION_INFO_set_cmp_func(sk, cmp) ((sk_PROFESSION_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(ADMISSIONS, ADMISSIONS, ADMISSIONS) +#define sk_ADMISSIONS_num(sk) OPENSSL_sk_num(ossl_check_const_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_value(sk, idx) ((ADMISSIONS *)OPENSSL_sk_value(ossl_check_const_ADMISSIONS_sk_type(sk), (idx))) +#define sk_ADMISSIONS_new(cmp) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new(ossl_check_ADMISSIONS_compfunc_type(cmp))) +#define sk_ADMISSIONS_new_null() ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new_null()) +#define sk_ADMISSIONS_new_reserve(cmp, n) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new_reserve(ossl_check_ADMISSIONS_compfunc_type(cmp), (n))) +#define sk_ADMISSIONS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ADMISSIONS_sk_type(sk), (n)) +#define sk_ADMISSIONS_free(sk) OPENSSL_sk_free(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_zero(sk) OPENSSL_sk_zero(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_delete(sk, i) ((ADMISSIONS *)OPENSSL_sk_delete(ossl_check_ADMISSIONS_sk_type(sk), (i))) +#define sk_ADMISSIONS_delete_ptr(sk, ptr) ((ADMISSIONS *)OPENSSL_sk_delete_ptr(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr))) +#define sk_ADMISSIONS_push(sk, ptr) OPENSSL_sk_push(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_pop(sk) ((ADMISSIONS *)OPENSSL_sk_pop(ossl_check_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_shift(sk) ((ADMISSIONS *)OPENSSL_sk_shift(ossl_check_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ADMISSIONS_sk_type(sk),ossl_check_ADMISSIONS_freefunc_type(freefunc)) +#define sk_ADMISSIONS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr), (idx)) +#define sk_ADMISSIONS_set(sk, idx, ptr) ((ADMISSIONS *)OPENSSL_sk_set(ossl_check_ADMISSIONS_sk_type(sk), (idx), ossl_check_ADMISSIONS_type(ptr))) +#define sk_ADMISSIONS_find(sk, ptr) OPENSSL_sk_find(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr), pnum) +#define sk_ADMISSIONS_sort(sk) OPENSSL_sk_sort(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_dup(sk) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_dup(ossl_check_const_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_deep_copy(ossl_check_const_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_copyfunc_type(copyfunc), ossl_check_ADMISSIONS_freefunc_type(freefunc))) +#define sk_ADMISSIONS_set_cmp_func(sk, cmp) ((sk_ADMISSIONS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_compfunc_type(cmp))) + +typedef STACK_OF(PROFESSION_INFO) PROFESSION_INFOS; + +const ASN1_OBJECT *NAMING_AUTHORITY_get0_authorityId( + const NAMING_AUTHORITY *n); +const ASN1_IA5STRING *NAMING_AUTHORITY_get0_authorityURL( + const NAMING_AUTHORITY *n); +const ASN1_STRING *NAMING_AUTHORITY_get0_authorityText( + const NAMING_AUTHORITY *n); +void NAMING_AUTHORITY_set0_authorityId(NAMING_AUTHORITY *n, + ASN1_OBJECT* namingAuthorityId); +void NAMING_AUTHORITY_set0_authorityURL(NAMING_AUTHORITY *n, + ASN1_IA5STRING* namingAuthorityUrl); +void NAMING_AUTHORITY_set0_authorityText(NAMING_AUTHORITY *n, + ASN1_STRING* namingAuthorityText); + +const GENERAL_NAME *ADMISSION_SYNTAX_get0_admissionAuthority( + const ADMISSION_SYNTAX *as); +void ADMISSION_SYNTAX_set0_admissionAuthority( + ADMISSION_SYNTAX *as, GENERAL_NAME *aa); +const STACK_OF(ADMISSIONS) *ADMISSION_SYNTAX_get0_contentsOfAdmissions( + const ADMISSION_SYNTAX *as); +void ADMISSION_SYNTAX_set0_contentsOfAdmissions( + ADMISSION_SYNTAX *as, STACK_OF(ADMISSIONS) *a); +const GENERAL_NAME *ADMISSIONS_get0_admissionAuthority(const ADMISSIONS *a); +void ADMISSIONS_set0_admissionAuthority(ADMISSIONS *a, GENERAL_NAME *aa); +const NAMING_AUTHORITY *ADMISSIONS_get0_namingAuthority(const ADMISSIONS *a); +void ADMISSIONS_set0_namingAuthority(ADMISSIONS *a, NAMING_AUTHORITY *na); +const PROFESSION_INFOS *ADMISSIONS_get0_professionInfos(const ADMISSIONS *a); +void ADMISSIONS_set0_professionInfos(ADMISSIONS *a, PROFESSION_INFOS *pi); +const ASN1_OCTET_STRING *PROFESSION_INFO_get0_addProfessionInfo( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_addProfessionInfo( + PROFESSION_INFO *pi, ASN1_OCTET_STRING *aos); +const NAMING_AUTHORITY *PROFESSION_INFO_get0_namingAuthority( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_namingAuthority( + PROFESSION_INFO *pi, NAMING_AUTHORITY *na); +const STACK_OF(ASN1_STRING) *PROFESSION_INFO_get0_professionItems( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_professionItems( + PROFESSION_INFO *pi, STACK_OF(ASN1_STRING) *as); +const STACK_OF(ASN1_OBJECT) *PROFESSION_INFO_get0_professionOIDs( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_professionOIDs( + PROFESSION_INFO *pi, STACK_OF(ASN1_OBJECT) *po); +const ASN1_PRINTABLESTRING *PROFESSION_INFO_get0_registrationNumber( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_registrationNumber( + PROFESSION_INFO *pi, ASN1_PRINTABLESTRING *rn); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h new file mode 100644 index 00000000000..b184807c80c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_digests.h @@ -0,0 +1,160 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_digests.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * sigAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 3 } + */ +#define DER_OID_V_sigAlgs DER_P_OBJECT, 8, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03 +#define DER_OID_SZ_sigAlgs 10 +extern const unsigned char ossl_der_oid_sigAlgs[DER_OID_SZ_sigAlgs]; + +/* + * id-sha1 OBJECT IDENTIFIER ::= { iso(1) + * identified-organization(3) oiw(14) + * secsig(3) algorithms(2) 26 } + */ +#define DER_OID_V_id_sha1 DER_P_OBJECT, 5, 0x2B, 0x0E, 0x03, 0x02, 0x1A +#define DER_OID_SZ_id_sha1 7 +extern const unsigned char ossl_der_oid_id_sha1[DER_OID_SZ_id_sha1]; + +/* + * id-md2 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 2 } + */ +#define DER_OID_V_id_md2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x02 +#define DER_OID_SZ_id_md2 10 +extern const unsigned char ossl_der_oid_id_md2[DER_OID_SZ_id_md2]; + +/* + * id-md5 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 5 } + */ +#define DER_OID_V_id_md5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05 +#define DER_OID_SZ_id_md5 10 +extern const unsigned char ossl_der_oid_id_md5[DER_OID_SZ_id_md5]; + +/* + * id-sha256 OBJECT IDENTIFIER ::= { hashAlgs 1 } + */ +#define DER_OID_V_id_sha256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01 +#define DER_OID_SZ_id_sha256 11 +extern const unsigned char ossl_der_oid_id_sha256[DER_OID_SZ_id_sha256]; + +/* + * id-sha384 OBJECT IDENTIFIER ::= { hashAlgs 2 } + */ +#define DER_OID_V_id_sha384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02 +#define DER_OID_SZ_id_sha384 11 +extern const unsigned char ossl_der_oid_id_sha384[DER_OID_SZ_id_sha384]; + +/* + * id-sha512 OBJECT IDENTIFIER ::= { hashAlgs 3 } + */ +#define DER_OID_V_id_sha512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03 +#define DER_OID_SZ_id_sha512 11 +extern const unsigned char ossl_der_oid_id_sha512[DER_OID_SZ_id_sha512]; + +/* + * id-sha224 OBJECT IDENTIFIER ::= { hashAlgs 4 } + */ +#define DER_OID_V_id_sha224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04 +#define DER_OID_SZ_id_sha224 11 +extern const unsigned char ossl_der_oid_id_sha224[DER_OID_SZ_id_sha224]; + +/* + * id-sha512-224 OBJECT IDENTIFIER ::= { hashAlgs 5 } + */ +#define DER_OID_V_id_sha512_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x05 +#define DER_OID_SZ_id_sha512_224 11 +extern const unsigned char ossl_der_oid_id_sha512_224[DER_OID_SZ_id_sha512_224]; + +/* + * id-sha512-256 OBJECT IDENTIFIER ::= { hashAlgs 6 } + */ +#define DER_OID_V_id_sha512_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x06 +#define DER_OID_SZ_id_sha512_256 11 +extern const unsigned char ossl_der_oid_id_sha512_256[DER_OID_SZ_id_sha512_256]; + +/* + * id-sha3-224 OBJECT IDENTIFIER ::= { hashAlgs 7 } + */ +#define DER_OID_V_id_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x07 +#define DER_OID_SZ_id_sha3_224 11 +extern const unsigned char ossl_der_oid_id_sha3_224[DER_OID_SZ_id_sha3_224]; + +/* + * id-sha3-256 OBJECT IDENTIFIER ::= { hashAlgs 8 } + */ +#define DER_OID_V_id_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x08 +#define DER_OID_SZ_id_sha3_256 11 +extern const unsigned char ossl_der_oid_id_sha3_256[DER_OID_SZ_id_sha3_256]; + +/* + * id-sha3-384 OBJECT IDENTIFIER ::= { hashAlgs 9 } + */ +#define DER_OID_V_id_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x09 +#define DER_OID_SZ_id_sha3_384 11 +extern const unsigned char ossl_der_oid_id_sha3_384[DER_OID_SZ_id_sha3_384]; + +/* + * id-sha3-512 OBJECT IDENTIFIER ::= { hashAlgs 10 } + */ +#define DER_OID_V_id_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0A +#define DER_OID_SZ_id_sha3_512 11 +extern const unsigned char ossl_der_oid_id_sha3_512[DER_OID_SZ_id_sha3_512]; + +/* + * id-shake128 OBJECT IDENTIFIER ::= { hashAlgs 11 } + */ +#define DER_OID_V_id_shake128 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0B +#define DER_OID_SZ_id_shake128 11 +extern const unsigned char ossl_der_oid_id_shake128[DER_OID_SZ_id_shake128]; + +/* + * id-shake256 OBJECT IDENTIFIER ::= { hashAlgs 12 } + */ +#define DER_OID_V_id_shake256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0C +#define DER_OID_SZ_id_shake256 11 +extern const unsigned char ossl_der_oid_id_shake256[DER_OID_SZ_id_shake256]; + +/* + * id-shake128-len OBJECT IDENTIFIER ::= { hashAlgs 17 } + */ +#define DER_OID_V_id_shake128_len DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x11 +#define DER_OID_SZ_id_shake128_len 11 +extern const unsigned char ossl_der_oid_id_shake128_len[DER_OID_SZ_id_shake128_len]; + +/* + * id-shake256-len OBJECT IDENTIFIER ::= { hashAlgs 18 } + */ +#define DER_OID_V_id_shake256_len DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x12 +#define DER_OID_SZ_id_shake256_len 11 +extern const unsigned char ossl_der_oid_id_shake256_len[DER_OID_SZ_id_shake256_len]; + +/* + * id-KMACWithSHAKE128 OBJECT IDENTIFIER ::={hashAlgs 19} + */ +#define DER_OID_V_id_KMACWithSHAKE128 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x13 +#define DER_OID_SZ_id_KMACWithSHAKE128 11 +extern const unsigned char ossl_der_oid_id_KMACWithSHAKE128[DER_OID_SZ_id_KMACWithSHAKE128]; + +/* + * id-KMACWithSHAKE256 OBJECT IDENTIFIER ::={ hashAlgs 20} + */ +#define DER_OID_V_id_KMACWithSHAKE256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x14 +#define DER_OID_SZ_id_KMACWithSHAKE256 11 +extern const unsigned char ossl_der_oid_id_KMACWithSHAKE256[DER_OID_SZ_id_KMACWithSHAKE256]; + diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h new file mode 100644 index 00000000000..b12a56282b2 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_dsa.h @@ -0,0 +1,94 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_dsa.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * id-dsa OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57(10040) x9algorithm(4) 1 } + */ +#define DER_OID_V_id_dsa DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x38, 0x04, 0x01 +#define DER_OID_SZ_id_dsa 9 +extern const unsigned char ossl_der_oid_id_dsa[DER_OID_SZ_id_dsa]; + +/* + * id-dsa-with-sha1 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57 (10040) x9algorithm(4) 3 } + */ +#define DER_OID_V_id_dsa_with_sha1 DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x38, 0x04, 0x03 +#define DER_OID_SZ_id_dsa_with_sha1 9 +extern const unsigned char ossl_der_oid_id_dsa_with_sha1[DER_OID_SZ_id_dsa_with_sha1]; + +/* + * id-dsa-with-sha224 OBJECT IDENTIFIER ::= { sigAlgs 1 } + */ +#define DER_OID_V_id_dsa_with_sha224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x01 +#define DER_OID_SZ_id_dsa_with_sha224 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha224[DER_OID_SZ_id_dsa_with_sha224]; + +/* + * id-dsa-with-sha256 OBJECT IDENTIFIER ::= { sigAlgs 2 } + */ +#define DER_OID_V_id_dsa_with_sha256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x02 +#define DER_OID_SZ_id_dsa_with_sha256 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha256[DER_OID_SZ_id_dsa_with_sha256]; + +/* + * id-dsa-with-sha384 OBJECT IDENTIFIER ::= { sigAlgs 3 } + */ +#define DER_OID_V_id_dsa_with_sha384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x03 +#define DER_OID_SZ_id_dsa_with_sha384 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha384[DER_OID_SZ_id_dsa_with_sha384]; + +/* + * id-dsa-with-sha512 OBJECT IDENTIFIER ::= { sigAlgs 4 } + */ +#define DER_OID_V_id_dsa_with_sha512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x04 +#define DER_OID_SZ_id_dsa_with_sha512 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha512[DER_OID_SZ_id_dsa_with_sha512]; + +/* + * id-dsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 5 } + */ +#define DER_OID_V_id_dsa_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x05 +#define DER_OID_SZ_id_dsa_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_224[DER_OID_SZ_id_dsa_with_sha3_224]; + +/* + * id-dsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 6 } + */ +#define DER_OID_V_id_dsa_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x06 +#define DER_OID_SZ_id_dsa_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_256[DER_OID_SZ_id_dsa_with_sha3_256]; + +/* + * id-dsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 7 } + */ +#define DER_OID_V_id_dsa_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x07 +#define DER_OID_SZ_id_dsa_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_384[DER_OID_SZ_id_dsa_with_sha3_384]; + +/* + * id-dsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 8 } + */ +#define DER_OID_V_id_dsa_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x08 +#define DER_OID_SZ_id_dsa_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_512[DER_OID_SZ_id_dsa_with_sha3_512]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_DSA(WPACKET *pkt, int tag, DSA *dsa); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_DSA_with_MD(WPACKET *pkt, int tag, + DSA *dsa, int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h new file mode 100644 index 00000000000..dd697771f71 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ec.h @@ -0,0 +1,286 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ec.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/ec.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * ecdsa-with-SHA1 OBJECT IDENTIFIER ::= { id-ecSigType 1 } + */ +#define DER_OID_V_ecdsa_with_SHA1 DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x01 +#define DER_OID_SZ_ecdsa_with_SHA1 9 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA1[DER_OID_SZ_ecdsa_with_SHA1]; + +/* + * id-ecPublicKey OBJECT IDENTIFIER ::= { id-publicKeyType 1 } + */ +#define DER_OID_V_id_ecPublicKey DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01 +#define DER_OID_SZ_id_ecPublicKey 9 +extern const unsigned char ossl_der_oid_id_ecPublicKey[DER_OID_SZ_id_ecPublicKey]; + +/* + * c2pnb163v1 OBJECT IDENTIFIER ::= { c-TwoCurve 1 } + */ +#define DER_OID_V_c2pnb163v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x01 +#define DER_OID_SZ_c2pnb163v1 10 +extern const unsigned char ossl_der_oid_c2pnb163v1[DER_OID_SZ_c2pnb163v1]; + +/* + * c2pnb163v2 OBJECT IDENTIFIER ::= { c-TwoCurve 2 } + */ +#define DER_OID_V_c2pnb163v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x02 +#define DER_OID_SZ_c2pnb163v2 10 +extern const unsigned char ossl_der_oid_c2pnb163v2[DER_OID_SZ_c2pnb163v2]; + +/* + * c2pnb163v3 OBJECT IDENTIFIER ::= { c-TwoCurve 3 } + */ +#define DER_OID_V_c2pnb163v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x03 +#define DER_OID_SZ_c2pnb163v3 10 +extern const unsigned char ossl_der_oid_c2pnb163v3[DER_OID_SZ_c2pnb163v3]; + +/* + * c2pnb176w1 OBJECT IDENTIFIER ::= { c-TwoCurve 4 } + */ +#define DER_OID_V_c2pnb176w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x04 +#define DER_OID_SZ_c2pnb176w1 10 +extern const unsigned char ossl_der_oid_c2pnb176w1[DER_OID_SZ_c2pnb176w1]; + +/* + * c2tnb191v1 OBJECT IDENTIFIER ::= { c-TwoCurve 5 } + */ +#define DER_OID_V_c2tnb191v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x05 +#define DER_OID_SZ_c2tnb191v1 10 +extern const unsigned char ossl_der_oid_c2tnb191v1[DER_OID_SZ_c2tnb191v1]; + +/* + * c2tnb191v2 OBJECT IDENTIFIER ::= { c-TwoCurve 6 } + */ +#define DER_OID_V_c2tnb191v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x06 +#define DER_OID_SZ_c2tnb191v2 10 +extern const unsigned char ossl_der_oid_c2tnb191v2[DER_OID_SZ_c2tnb191v2]; + +/* + * c2tnb191v3 OBJECT IDENTIFIER ::= { c-TwoCurve 7 } + */ +#define DER_OID_V_c2tnb191v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x07 +#define DER_OID_SZ_c2tnb191v3 10 +extern const unsigned char ossl_der_oid_c2tnb191v3[DER_OID_SZ_c2tnb191v3]; + +/* + * c2onb191v4 OBJECT IDENTIFIER ::= { c-TwoCurve 8 } + */ +#define DER_OID_V_c2onb191v4 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x08 +#define DER_OID_SZ_c2onb191v4 10 +extern const unsigned char ossl_der_oid_c2onb191v4[DER_OID_SZ_c2onb191v4]; + +/* + * c2onb191v5 OBJECT IDENTIFIER ::= { c-TwoCurve 9 } + */ +#define DER_OID_V_c2onb191v5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x09 +#define DER_OID_SZ_c2onb191v5 10 +extern const unsigned char ossl_der_oid_c2onb191v5[DER_OID_SZ_c2onb191v5]; + +/* + * c2pnb208w1 OBJECT IDENTIFIER ::= { c-TwoCurve 10 } + */ +#define DER_OID_V_c2pnb208w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0A +#define DER_OID_SZ_c2pnb208w1 10 +extern const unsigned char ossl_der_oid_c2pnb208w1[DER_OID_SZ_c2pnb208w1]; + +/* + * c2tnb239v1 OBJECT IDENTIFIER ::= { c-TwoCurve 11 } + */ +#define DER_OID_V_c2tnb239v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0B +#define DER_OID_SZ_c2tnb239v1 10 +extern const unsigned char ossl_der_oid_c2tnb239v1[DER_OID_SZ_c2tnb239v1]; + +/* + * c2tnb239v2 OBJECT IDENTIFIER ::= { c-TwoCurve 12 } + */ +#define DER_OID_V_c2tnb239v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0C +#define DER_OID_SZ_c2tnb239v2 10 +extern const unsigned char ossl_der_oid_c2tnb239v2[DER_OID_SZ_c2tnb239v2]; + +/* + * c2tnb239v3 OBJECT IDENTIFIER ::= { c-TwoCurve 13 } + */ +#define DER_OID_V_c2tnb239v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0D +#define DER_OID_SZ_c2tnb239v3 10 +extern const unsigned char ossl_der_oid_c2tnb239v3[DER_OID_SZ_c2tnb239v3]; + +/* + * c2onb239v4 OBJECT IDENTIFIER ::= { c-TwoCurve 14 } + */ +#define DER_OID_V_c2onb239v4 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0E +#define DER_OID_SZ_c2onb239v4 10 +extern const unsigned char ossl_der_oid_c2onb239v4[DER_OID_SZ_c2onb239v4]; + +/* + * c2onb239v5 OBJECT IDENTIFIER ::= { c-TwoCurve 15 } + */ +#define DER_OID_V_c2onb239v5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0F +#define DER_OID_SZ_c2onb239v5 10 +extern const unsigned char ossl_der_oid_c2onb239v5[DER_OID_SZ_c2onb239v5]; + +/* + * c2pnb272w1 OBJECT IDENTIFIER ::= { c-TwoCurve 16 } + */ +#define DER_OID_V_c2pnb272w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x10 +#define DER_OID_SZ_c2pnb272w1 10 +extern const unsigned char ossl_der_oid_c2pnb272w1[DER_OID_SZ_c2pnb272w1]; + +/* + * c2pnb304w1 OBJECT IDENTIFIER ::= { c-TwoCurve 17 } + */ +#define DER_OID_V_c2pnb304w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x11 +#define DER_OID_SZ_c2pnb304w1 10 +extern const unsigned char ossl_der_oid_c2pnb304w1[DER_OID_SZ_c2pnb304w1]; + +/* + * c2tnb359v1 OBJECT IDENTIFIER ::= { c-TwoCurve 18 } + */ +#define DER_OID_V_c2tnb359v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x12 +#define DER_OID_SZ_c2tnb359v1 10 +extern const unsigned char ossl_der_oid_c2tnb359v1[DER_OID_SZ_c2tnb359v1]; + +/* + * c2pnb368w1 OBJECT IDENTIFIER ::= { c-TwoCurve 19 } + */ +#define DER_OID_V_c2pnb368w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x13 +#define DER_OID_SZ_c2pnb368w1 10 +extern const unsigned char ossl_der_oid_c2pnb368w1[DER_OID_SZ_c2pnb368w1]; + +/* + * c2tnb431r1 OBJECT IDENTIFIER ::= { c-TwoCurve 20 } + */ +#define DER_OID_V_c2tnb431r1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x14 +#define DER_OID_SZ_c2tnb431r1 10 +extern const unsigned char ossl_der_oid_c2tnb431r1[DER_OID_SZ_c2tnb431r1]; + +/* + * prime192v1 OBJECT IDENTIFIER ::= { primeCurve 1 } + */ +#define DER_OID_V_prime192v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x01 +#define DER_OID_SZ_prime192v1 10 +extern const unsigned char ossl_der_oid_prime192v1[DER_OID_SZ_prime192v1]; + +/* + * prime192v2 OBJECT IDENTIFIER ::= { primeCurve 2 } + */ +#define DER_OID_V_prime192v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x02 +#define DER_OID_SZ_prime192v2 10 +extern const unsigned char ossl_der_oid_prime192v2[DER_OID_SZ_prime192v2]; + +/* + * prime192v3 OBJECT IDENTIFIER ::= { primeCurve 3 } + */ +#define DER_OID_V_prime192v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x03 +#define DER_OID_SZ_prime192v3 10 +extern const unsigned char ossl_der_oid_prime192v3[DER_OID_SZ_prime192v3]; + +/* + * prime239v1 OBJECT IDENTIFIER ::= { primeCurve 4 } + */ +#define DER_OID_V_prime239v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x04 +#define DER_OID_SZ_prime239v1 10 +extern const unsigned char ossl_der_oid_prime239v1[DER_OID_SZ_prime239v1]; + +/* + * prime239v2 OBJECT IDENTIFIER ::= { primeCurve 5 } + */ +#define DER_OID_V_prime239v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x05 +#define DER_OID_SZ_prime239v2 10 +extern const unsigned char ossl_der_oid_prime239v2[DER_OID_SZ_prime239v2]; + +/* + * prime239v3 OBJECT IDENTIFIER ::= { primeCurve 6 } + */ +#define DER_OID_V_prime239v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x06 +#define DER_OID_SZ_prime239v3 10 +extern const unsigned char ossl_der_oid_prime239v3[DER_OID_SZ_prime239v3]; + +/* + * prime256v1 OBJECT IDENTIFIER ::= { primeCurve 7 } + */ +#define DER_OID_V_prime256v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07 +#define DER_OID_SZ_prime256v1 10 +extern const unsigned char ossl_der_oid_prime256v1[DER_OID_SZ_prime256v1]; + +/* + * ecdsa-with-SHA224 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 1 } + */ +#define DER_OID_V_ecdsa_with_SHA224 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01 +#define DER_OID_SZ_ecdsa_with_SHA224 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA224[DER_OID_SZ_ecdsa_with_SHA224]; + +/* + * ecdsa-with-SHA256 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 2 } + */ +#define DER_OID_V_ecdsa_with_SHA256 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02 +#define DER_OID_SZ_ecdsa_with_SHA256 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA256[DER_OID_SZ_ecdsa_with_SHA256]; + +/* + * ecdsa-with-SHA384 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 3 } + */ +#define DER_OID_V_ecdsa_with_SHA384 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03 +#define DER_OID_SZ_ecdsa_with_SHA384 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA384[DER_OID_SZ_ecdsa_with_SHA384]; + +/* + * ecdsa-with-SHA512 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 4 } + */ +#define DER_OID_V_ecdsa_with_SHA512 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x04 +#define DER_OID_SZ_ecdsa_with_SHA512 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA512[DER_OID_SZ_ecdsa_with_SHA512]; + +/* + * id-ecdsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 9 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x09 +#define DER_OID_SZ_id_ecdsa_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_224[DER_OID_SZ_id_ecdsa_with_sha3_224]; + +/* + * id-ecdsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 10 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0A +#define DER_OID_SZ_id_ecdsa_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_256[DER_OID_SZ_id_ecdsa_with_sha3_256]; + +/* + * id-ecdsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 11 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0B +#define DER_OID_SZ_id_ecdsa_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_384[DER_OID_SZ_id_ecdsa_with_sha3_384]; + +/* + * id-ecdsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 12 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0C +#define DER_OID_SZ_id_ecdsa_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_512[DER_OID_SZ_id_ecdsa_with_sha3_512]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_EC(WPACKET *pkt, int cont, EC_KEY *ec); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_ECDSA_with_MD(WPACKET *pkt, int cont, + EC_KEY *ec, int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h new file mode 100644 index 00000000000..fc85738055b --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_ecx.h @@ -0,0 +1,50 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ecx.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/ecx.h" + +/* Well known OIDs precompiled */ + +/* + * id-X25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 110 } + */ +#define DER_OID_V_id_X25519 DER_P_OBJECT, 3, 0x2B, 0x65, 0x6E +#define DER_OID_SZ_id_X25519 5 +extern const unsigned char ossl_der_oid_id_X25519[DER_OID_SZ_id_X25519]; + +/* + * id-X448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 111 } + */ +#define DER_OID_V_id_X448 DER_P_OBJECT, 3, 0x2B, 0x65, 0x6F +#define DER_OID_SZ_id_X448 5 +extern const unsigned char ossl_der_oid_id_X448[DER_OID_SZ_id_X448]; + +/* + * id-Ed25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 112 } + */ +#define DER_OID_V_id_Ed25519 DER_P_OBJECT, 3, 0x2B, 0x65, 0x70 +#define DER_OID_SZ_id_Ed25519 5 +extern const unsigned char ossl_der_oid_id_Ed25519[DER_OID_SZ_id_Ed25519]; + +/* + * id-Ed448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 113 } + */ +#define DER_OID_V_id_Ed448 DER_P_OBJECT, 3, 0x2B, 0x65, 0x71 +#define DER_OID_SZ_id_Ed448 5 +extern const unsigned char ossl_der_oid_id_Ed448[DER_OID_SZ_id_Ed448]; + + +int ossl_DER_w_algorithmIdentifier_ED25519(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_ED448(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_X25519(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_X448(WPACKET *pkt, int cont, ECX_KEY *ec); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h new file mode 100644 index 00000000000..5ec3c515a1b --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_rsa.h @@ -0,0 +1,187 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_rsa.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/rsa.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * hashAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 2 } + */ +#define DER_OID_V_hashAlgs DER_P_OBJECT, 8, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02 +#define DER_OID_SZ_hashAlgs 10 +extern const unsigned char ossl_der_oid_hashAlgs[DER_OID_SZ_hashAlgs]; + +/* + * rsaEncryption OBJECT IDENTIFIER ::= { pkcs-1 1 } + */ +#define DER_OID_V_rsaEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01 +#define DER_OID_SZ_rsaEncryption 11 +extern const unsigned char ossl_der_oid_rsaEncryption[DER_OID_SZ_rsaEncryption]; + +/* + * id-RSAES-OAEP OBJECT IDENTIFIER ::= { pkcs-1 7 } + */ +#define DER_OID_V_id_RSAES_OAEP DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x07 +#define DER_OID_SZ_id_RSAES_OAEP 11 +extern const unsigned char ossl_der_oid_id_RSAES_OAEP[DER_OID_SZ_id_RSAES_OAEP]; + +/* + * id-pSpecified OBJECT IDENTIFIER ::= { pkcs-1 9 } + */ +#define DER_OID_V_id_pSpecified DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x09 +#define DER_OID_SZ_id_pSpecified 11 +extern const unsigned char ossl_der_oid_id_pSpecified[DER_OID_SZ_id_pSpecified]; + +/* + * id-RSASSA-PSS OBJECT IDENTIFIER ::= { pkcs-1 10 } + */ +#define DER_OID_V_id_RSASSA_PSS DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0A +#define DER_OID_SZ_id_RSASSA_PSS 11 +extern const unsigned char ossl_der_oid_id_RSASSA_PSS[DER_OID_SZ_id_RSASSA_PSS]; + +/* + * md2WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 2 } + */ +#define DER_OID_V_md2WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x02 +#define DER_OID_SZ_md2WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md2WithRSAEncryption[DER_OID_SZ_md2WithRSAEncryption]; + +/* + * md5WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 4 } + */ +#define DER_OID_V_md5WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x04 +#define DER_OID_SZ_md5WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md5WithRSAEncryption[DER_OID_SZ_md5WithRSAEncryption]; + +/* + * sha1WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 5 } + */ +#define DER_OID_V_sha1WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05 +#define DER_OID_SZ_sha1WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha1WithRSAEncryption[DER_OID_SZ_sha1WithRSAEncryption]; + +/* + * sha224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 14 } + */ +#define DER_OID_V_sha224WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E +#define DER_OID_SZ_sha224WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha224WithRSAEncryption[DER_OID_SZ_sha224WithRSAEncryption]; + +/* + * sha256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 11 } + */ +#define DER_OID_V_sha256WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B +#define DER_OID_SZ_sha256WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha256WithRSAEncryption[DER_OID_SZ_sha256WithRSAEncryption]; + +/* + * sha384WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 12 } + */ +#define DER_OID_V_sha384WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0C +#define DER_OID_SZ_sha384WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha384WithRSAEncryption[DER_OID_SZ_sha384WithRSAEncryption]; + +/* + * sha512WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 13 } + */ +#define DER_OID_V_sha512WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0D +#define DER_OID_SZ_sha512WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512WithRSAEncryption[DER_OID_SZ_sha512WithRSAEncryption]; + +/* + * sha512-224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 15 } + */ +#define DER_OID_V_sha512_224WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0F +#define DER_OID_SZ_sha512_224WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512_224WithRSAEncryption[DER_OID_SZ_sha512_224WithRSAEncryption]; + +/* + * sha512-256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 16 } + */ +#define DER_OID_V_sha512_256WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x10 +#define DER_OID_SZ_sha512_256WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512_256WithRSAEncryption[DER_OID_SZ_sha512_256WithRSAEncryption]; + +/* + * id-mgf1 OBJECT IDENTIFIER ::= { pkcs-1 8 } + */ +#define DER_OID_V_id_mgf1 DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x08 +#define DER_OID_SZ_id_mgf1 11 +extern const unsigned char ossl_der_oid_id_mgf1[DER_OID_SZ_id_mgf1]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 13 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0D +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_224[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 14 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0E +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_256[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 15 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0F +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_384[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 16 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x10 +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_512[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512]; + +/* + * md4WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 3 } + */ +#define DER_OID_V_md4WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x03 +#define DER_OID_SZ_md4WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md4WithRSAEncryption[DER_OID_SZ_md4WithRSAEncryption]; + +/* + * ripemd160WithRSAEncryption OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) teletrust(36) algorithm(3) signatureAlgorithm(3) rsaSignature(1) 2 + * } + */ +#define DER_OID_V_ripemd160WithRSAEncryption DER_P_OBJECT, 6, 0x2B, 0x24, 0x03, 0x03, 0x01, 0x02 +#define DER_OID_SZ_ripemd160WithRSAEncryption 8 +extern const unsigned char ossl_der_oid_ripemd160WithRSAEncryption[DER_OID_SZ_ripemd160WithRSAEncryption]; + +/* + * mdc2WithRSASignature OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) oiw(14) secsig(3) algorithms(2) mdc2WithRSASignature(14) + * } + */ +#define DER_OID_V_mdc2WithRSASignature DER_P_OBJECT, 5, 0x2B, 0x0E, 0x03, 0x02, 0x0E +#define DER_OID_SZ_mdc2WithRSASignature 7 +extern const unsigned char ossl_der_oid_mdc2WithRSASignature[DER_OID_SZ_mdc2WithRSASignature]; + + +/* PSS parameters */ +int ossl_DER_w_RSASSA_PSS_params(WPACKET *pkt, int tag, + const RSA_PSS_PARAMS_30 *pss); +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_RSA(WPACKET *pkt, int tag, RSA *rsa); +int ossl_DER_w_algorithmIdentifier_RSA_PSS(WPACKET *pkt, int tag, + int rsa_type, + const RSA_PSS_PARAMS_30 *pss); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_MDWithRSAEncryption(WPACKET *pkt, int tag, + int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h new file mode 100644 index 00000000000..9d41b31265c --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_sm2.h @@ -0,0 +1,37 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_sm2.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/ec.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * sm2-with-SM3 OBJECT IDENTIFIER ::= { sm-scheme 501 } + */ +#define DER_OID_V_sm2_with_SM3 DER_P_OBJECT, 8, 0x2A, 0x81, 0x1C, 0xCF, 0x55, 0x01, 0x83, 0x75 +#define DER_OID_SZ_sm2_with_SM3 10 +extern const unsigned char ossl_der_oid_sm2_with_SM3[DER_OID_SZ_sm2_with_SM3]; + +/* + * curveSM2 OBJECT IDENTIFIER ::= { sm-scheme 301 } + */ +#define DER_OID_V_curveSM2 DER_P_OBJECT, 8, 0x2A, 0x81, 0x1C, 0xCF, 0x55, 0x01, 0x82, 0x2D +#define DER_OID_SZ_curveSM2 10 +extern const unsigned char ossl_der_oid_curveSM2[DER_OID_SZ_curveSM2]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_SM2(WPACKET *pkt, int cont, EC_KEY *ec); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_SM2_with_MD(WPACKET *pkt, int cont, + EC_KEY *ec, int mdnid); diff --git a/contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h new file mode 100644 index 00000000000..ff295403772 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include/prov/der_wrap.h @@ -0,0 +1,46 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_wrap.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * id-alg-CMS3DESwrap OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-9(9) smime(16) alg(3) 6 + * } + */ +#define DER_OID_V_id_alg_CMS3DESwrap DER_P_OBJECT, 11, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x10, 0x03, 0x06 +#define DER_OID_SZ_id_alg_CMS3DESwrap 13 +extern const unsigned char ossl_der_oid_id_alg_CMS3DESwrap[DER_OID_SZ_id_alg_CMS3DESwrap]; + +/* + * id-aes128-wrap OBJECT IDENTIFIER ::= { aes 5 } + */ +#define DER_OID_V_id_aes128_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x05 +#define DER_OID_SZ_id_aes128_wrap 11 +extern const unsigned char ossl_der_oid_id_aes128_wrap[DER_OID_SZ_id_aes128_wrap]; + +/* + * id-aes192-wrap OBJECT IDENTIFIER ::= { aes 25 } + */ +#define DER_OID_V_id_aes192_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x19 +#define DER_OID_SZ_id_aes192_wrap 11 +extern const unsigned char ossl_der_oid_id_aes192_wrap[DER_OID_SZ_id_aes192_wrap]; + +/* + * id-aes256-wrap OBJECT IDENTIFIER ::= { aes 45 } + */ +#define DER_OID_V_id_aes256_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x2D +#define DER_OID_SZ_id_aes256_wrap 11 +extern const unsigned char ossl_der_oid_id_aes256_wrap[DER_OID_SZ_id_aes256_wrap]; + diff --git a/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h b/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h new file mode 100644 index 00000000000..a3a6485bbe9 --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h @@ -0,0 +1,30 @@ +/* + * WARNING: do not edit! + * Generated by util/mkbuildinf.pl + * + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#define PLATFORM "platform: linux64-loongarch64" +#define DATE "built on: Thu Apr 18 07:53:56 2024 UTC" + +/* + * Generate compiler_flags as an array of individual characters. This is a + * workaround for the situation where CFLAGS gets too long for a C90 string + * literal + */ +static const char compiler_flags[] = { + 'c','o','m','p','i','l','e','r',':',' ','g','c','c',' ','-','f', + 'P','I','C',' ','-','p','t','h','r','e','a','d',' ','-','W','a', + 'l','l',' ','-','O','3',' ','-','D','O','P','E','N','S','S','L', + '_','U','S','E','_','N','O','D','E','L','E','T','E',' ','-','D', + 'L','_','E','N','D','I','A','N',' ','-','D','O','P','E','N','S', + 'S','L','_','P','I','C',' ','-','D','O','P','E','N','S','S','L', + '_','B','U','I','L','D','I','N','G','_','O','P','E','N','S','S', + 'L',' ','-','D','N','D','E','B','U','G','\0' +}; diff --git a/contrib/openssl-cmake/linux_loongarch64/params_idx.c b/contrib/openssl-cmake/linux_loongarch64/params_idx.c new file mode 100644 index 00000000000..9e453c26fdc --- /dev/null +++ b/contrib/openssl-cmake/linux_loongarch64/params_idx.c @@ -0,0 +1,2710 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from crypto/params_idx.c.in + * + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#include "internal/e_os.h" +#include "internal/param_names.h" +#include + +/* Machine generated TRIE -- generated by util/perl/OpenSSL/paramnames.pm */ +int ossl_param_find_pidx(const char *s) +{ + switch(s[0]) { + default: + break; + case 'a': + switch(s[1]) { + default: + break; + case 'c': + if (strcmp("vp-info", s + 2) == 0) + return PIDX_KDF_PARAM_X942_ACVPINFO; + break; + case 'd': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_ARGON2_AD; + } + break; + case 'e': + if (strcmp("ad", s + 2) == 0) + return PIDX_CIPHER_PARAM_AEAD; + break; + case 'l': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case '_': + if (strcmp("id_param", s + 4) == 0) + return PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS; + break; + case 'i': + if (strcmp("d-absent", s + 4) == 0) + return PIDX_DIGEST_PARAM_ALGID_ABSENT; + break; + case 'o': + if (strcmp("rithm-id", s + 4) == 0) + return PIDX_SIGNATURE_PARAM_ALGORITHM_ID; + } + break; + case 'i': + if (strcmp("as", s + 3) == 0) + return PIDX_STORE_PARAM_ALIAS; + } + break; + case '\0': + return PIDX_PKEY_PARAM_EC_A; + } + break; + case 'b': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("sis-type", s + 2) == 0) + return PIDX_PKEY_PARAM_EC_CHAR2_TYPE; + break; + case 'i': + if (strcmp("ts", s + 2) == 0) + return PIDX_PKEY_PARAM_BITS; + break; + case 'l': + switch(s[2]) { + default: + break; + case 'o': + switch(s[3]) { + default: + break; + case 'c': + switch(s[4]) { + default: + break; + case 'k': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("size", s + 6) == 0) + return PIDX_MAC_PARAM_BLOCK_SIZE; + break; + case '_': + if (strcmp("padding", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING; + break; + case 's': + if (strcmp("ize", s + 6) == 0) + return PIDX_CIPHER_PARAM_BLOCK_SIZE; + } + } + } + } + break; + case 'u': + if (strcmp("ildinfo", s + 2) == 0) + return PIDX_PROV_PARAM_BUILDINFO; + break; + case '\0': + return PIDX_PKEY_PARAM_EC_B; + } + break; + case 'c': + switch(s[1]) { + default: + break; + case '-': + if (strcmp("rounds", s + 2) == 0) + return PIDX_MAC_PARAM_C_ROUNDS; + break; + case 'e': + if (strcmp("kalg", s + 2) == 0) + return PIDX_KDF_PARAM_CEK_ALG; + break; + case 'i': + if (strcmp("pher", s + 2) == 0) + return PIDX_ALG_PARAM_CIPHER; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'f': + if (strcmp("actor", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_COFACTOR; + break; + case 'n': + switch(s[3]) { + default: + break; + case 's': + if (strcmp("tant", s + 4) == 0) + return PIDX_KDF_PARAM_CONSTANT; + break; + case 't': + if (strcmp("ext-string", s + 4) == 0) + return PIDX_SIGNATURE_PARAM_CONTEXT_STRING; + } + } + break; + case 't': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case '_': + if (strcmp("mode", s + 4) == 0) + return PIDX_CIPHER_PARAM_CTS_MODE; + break; + case '\0': + return PIDX_CIPHER_PARAM_CTS; + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'o': + switch(s[5]) { + default: + break; + case 'm': + switch(s[6]) { + default: + break; + case '-': + if (strcmp("iv", s + 7) == 0) + return PIDX_CIPHER_PARAM_CUSTOM_IV; + break; + case '\0': + return PIDX_MAC_PARAM_CUSTOM; + } + } + } + } + } + } + break; + case 'd': + switch(s[1]) { + default: + break; + case '-': + if (strcmp("rounds", s + 2) == 0) + return PIDX_MAC_PARAM_D_ROUNDS; + break; + case 'a': + switch(s[2]) { + default: + break; + case 't': + switch(s[3]) { + default: + break; + case 'a': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 's': + if (strcmp("tructure", s + 6) == 0) + return PIDX_OBJECT_PARAM_DATA_STRUCTURE; + break; + case 't': + if (strcmp("ype", s + 6) == 0) + return PIDX_OBJECT_PARAM_DATA_TYPE; + } + break; + case '\0': + return PIDX_KDF_PARAM_DATA; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("oded-from-explicit", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS; + break; + case 'f': + if (strcmp("ault-digest", s + 3) == 0) + return PIDX_PKEY_PARAM_DEFAULT_DIGEST; + break; + case 's': + if (strcmp("c", s + 3) == 0) + return PIDX_OBJECT_PARAM_DESC; + } + break; + case 'h': + if (strcmp("kem-ikm", s + 2) == 0) + return PIDX_PKEY_PARAM_DHKEM_IKM; + break; + case 'i': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 's': + switch(s[5]) { + default: + break; + case 't': + switch(s[6]) { + default: + break; + case '-': + switch(s[7]) { + default: + break; + case 'n': + if (strcmp("oinit", s + 8) == 0) + return PIDX_MAC_PARAM_DIGEST_NOINIT; + break; + case 'o': + if (strcmp("neshot", s + 8) == 0) + return PIDX_MAC_PARAM_DIGEST_ONESHOT; + break; + case 'p': + if (strcmp("rops", s + 8) == 0) + return PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS; + break; + case 's': + if (strcmp("ize", s + 8) == 0) + return PIDX_PKEY_PARAM_DIGEST_SIZE; + } + break; + case '\0': + return PIDX_STORE_PARAM_DIGEST; + } + } + } + } + break; + case 's': + if (strcmp("tid", s + 3) == 0) + return PIDX_PKEY_PARAM_DIST_ID; + } + break; + case 'r': + if (strcmp("bg-no-trunc-md", s + 2) == 0) + return PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST; + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_D; + } + break; + case 'e': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("rly_clean", s + 2) == 0) + return PIDX_KDF_PARAM_EARLY_CLEAN; + break; + case 'c': + if (strcmp("dh-cofactor-mode", s + 2) == 0) + return PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE; + break; + case 'n': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'o': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'e': + if (strcmp("d-pub-key", s + 6) == 0) + return PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY; + break; + case 'i': + if (strcmp("ng", s + 6) == 0) + return PIDX_PKEY_PARAM_EC_ENCODING; + } + } + break; + case 'r': + if (strcmp("ypt-level", s + 4) == 0) + return PIDX_ENCODER_PARAM_ENCRYPT_LEVEL; + } + break; + case 'g': + if (strcmp("ine", s + 3) == 0) + return PIDX_ALG_PARAM_ENGINE; + break; + case 't': + switch(s[3]) { + default: + break; + case 'r': + switch(s[4]) { + default: + break; + case 'o': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'y': + switch(s[7]) { + default: + break; + case '_': + if (strcmp("required", s + 8) == 0) + return PIDX_DRBG_PARAM_ENTROPY_REQUIRED; + break; + case '\0': + return PIDX_KDF_PARAM_HMACDRBG_ENTROPY; + } + } + } + } + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_E; + break; + case 'x': + if (strcmp("pect", s + 2) == 0) + return PIDX_STORE_PARAM_EXPECT; + } + break; + case 'f': + switch(s[1]) { + default: + break; + case 'i': + switch(s[2]) { + default: + break; + case 'e': + if (strcmp("ld-type", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_FIELD_TYPE; + break; + case 'n': + if (strcmp("gerprint", s + 3) == 0) + return PIDX_STORE_PARAM_FINGERPRINT; + } + } + break; + case 'g': + switch(s[1]) { + default: + break; + case 'e': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case '\0': + return PIDX_RAND_PARAM_GENERATE; + } + break; + case 'o': + if (strcmp("r", s + 8) == 0) + return PIDX_PKEY_PARAM_EC_GENERATOR; + } + } + } + } + } + } + break; + case 'i': + if (strcmp("ndex", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_GINDEX; + break; + case 'r': + switch(s[2]) { + default: + break; + case 'o': + switch(s[3]) { + default: + break; + case 'u': + switch(s[4]) { + default: + break; + case 'p': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("check", s + 6) == 0) + return PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE; + break; + case '\0': + return PIDX_PKEY_PARAM_GROUP_NAME; + } + } + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_G; + } + break; + case 'h': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("s-randkey", s + 2) == 0) + return PIDX_CIPHER_PARAM_HAS_RAND_KEY; + break; + case 'i': + if (strcmp("ndex", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_H; + } + break; + case 'i': + switch(s[1]) { + default: + break; + case 'd': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_PKCS12_ID; + } + break; + case 'k': + if (strcmp("me", s + 2) == 0) + return PIDX_KEM_PARAM_IKME; + break; + case 'm': + if (strcmp("plicit-rejection", s + 2) == 0) + return PIDX_PKEY_PARAM_IMPLICIT_REJECTION; + break; + case 'n': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("lude-public", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC; + break; + case 'f': + if (strcmp("o", s + 3) == 0) + return PIDX_PASSPHRASE_PARAM_INFO; + break; + case 'p': + if (strcmp("ut-type", s + 3) == 0) + return PIDX_STORE_PARAM_INPUT_TYPE; + break; + case 's': + if (strcmp("tance", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_INSTANCE; + } + break; + case 't': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'r': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("tion", s + 5) == 0) + return PIDX_GEN_PARAM_ITERATION; + break; + case '\0': + return PIDX_KDF_PARAM_ITER; + } + } + } + break; + case 'v': + switch(s[2]) { + default: + break; + case 'l': + if (strcmp("en", s + 3) == 0) + return PIDX_CIPHER_PARAM_IVLEN; + break; + case '\0': + return PIDX_MAC_PARAM_IV; + } + } + break; + case 'j': + switch(s[1]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_COFACTOR; + } + break; + case 'k': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K2; + } + break; + case '3': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K3; + } + break; + case 'a': + if (strcmp("t", s + 2) == 0) + return PIDX_SIGNATURE_PARAM_KAT; + break; + case 'd': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 'g': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case 's': + switch(s[9]) { + default: + break; + case 't': + switch(s[10]) { + default: + break; + case '-': + if (strcmp("props", s + 11) == 0) + return PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS; + break; + case '\0': + return PIDX_EXCHANGE_PARAM_KDF_DIGEST; + } + } + } + } + } + } + break; + case 'o': + if (strcmp("utlen", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_OUTLEN; + break; + case 't': + if (strcmp("ype", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_TYPE; + break; + case 'u': + if (strcmp("km", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_UKM; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'y': + switch(s[3]) { + default: + break; + case 'b': + if (strcmp("its", s + 4) == 0) + return PIDX_CIPHER_PARAM_RC2_KEYBITS; + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_CIPHER_PARAM_KEYLEN; + break; + case '\0': + return PIDX_KDF_PARAM_KEY; + } + } + } + break; + case 'l': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'b': + if (strcmp("el", s + 3) == 0) + return PIDX_KDF_PARAM_LABEL; + break; + case 'n': + if (strcmp("es", s + 3) == 0) + return PIDX_KDF_PARAM_ARGON2_LANES; + } + } + break; + case 'm': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'k': + if (strcmp("ey", s + 4) == 0) + return PIDX_CIPHER_PARAM_AEAD_MAC_KEY; + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_KDF_PARAM_MAC_SIZE; + break; + case '\0': + return PIDX_ALG_PARAM_MAC; + } + break; + case 'n': + if (strcmp("datory-digest", s + 3) == 0) + return PIDX_PKEY_PARAM_MANDATORY_DIGEST; + break; + case 'x': + switch(s[3]) { + default: + break; + case '-': + if (strcmp("size", s + 4) == 0) + return PIDX_PKEY_PARAM_MAX_SIZE; + break; + case '_': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("dinlen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_ADINLEN; + break; + case 'e': + switch(s[5]) { + default: + break; + case 'a': + if (strcmp("rly_data", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA; + break; + case 'n': + if (strcmp("tropylen", s + 6) == 0) + return PIDX_DRBG_PARAM_MAX_ENTROPYLEN; + } + break; + case 'f': + if (strcmp("rag_len", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN; + break; + case 'n': + if (strcmp("oncelen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_NONCELEN; + break; + case 'p': + if (strcmp("erslen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_PERSLEN; + break; + case 'r': + if (strcmp("equest", s + 5) == 0) + return PIDX_RAND_PARAM_MAX_REQUEST; + } + break; + case 'i': + if (strcmp("um_length", s + 4) == 0) + return PIDX_DRBG_PARAM_MAX_LENGTH; + break; + case 'm': + if (strcmp("em_bytes", s + 4) == 0) + return PIDX_KDF_PARAM_SCRYPT_MAXMEM; + } + } + break; + case 'e': + if (strcmp("mcost", s + 2) == 0) + return PIDX_KDF_PARAM_ARGON2_MEMCOST; + break; + case 'g': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case '1': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'd': + if (strcmp("igest", s + 6) == 0) + return PIDX_PKEY_PARAM_MGF1_DIGEST; + break; + case 'p': + if (strcmp("roperties", s + 6) == 0) + return PIDX_PKEY_PARAM_MGF1_PROPERTIES; + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_MASKGENFUNC; + } + } + break; + case 'i': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("alg", s + 3) == 0) + return PIDX_DIGEST_PARAM_MICALG; + break; + case 'n': + switch(s[3]) { + default: + break; + case '_': + switch(s[4]) { + default: + break; + case 'e': + if (strcmp("ntropylen", s + 5) == 0) + return PIDX_DRBG_PARAM_MIN_ENTROPYLEN; + break; + case 'n': + if (strcmp("oncelen", s + 5) == 0) + return PIDX_DRBG_PARAM_MIN_NONCELEN; + } + break; + case 'i': + if (strcmp("um_length", s + 4) == 0) + return PIDX_DRBG_PARAM_MIN_LENGTH; + } + } + break; + case 'o': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case '\0': + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE; + } + break; + case 'u': + if (strcmp("le-filename", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_MODULE_FILENAME; + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_M; + } + break; + case 'n': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("me", s + 2) == 0) + return PIDX_STORE_PARAM_ISSUER; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'c': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("type", s + 6) == 0) + return PIDX_SIGNATURE_PARAM_NONCE_TYPE; + break; + case '\0': + return PIDX_KDF_PARAM_HMACDRBG_NONCE; + } + } + } + } + break; + case 'u': + if (strcmp("m", s + 2) == 0) + return PIDX_CIPHER_PARAM_NUM; + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_N; + } + break; + case 'o': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("ep-label", s + 2) == 0) + return PIDX_ASYM_CIPHER_PARAM_OAEP_LABEL; + break; + case 'p': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'n': + if (strcmp("ssl-version", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_VERSION; + break; + case 'r': + if (strcmp("ation", s + 4) == 0) + return PIDX_KEM_PARAM_OPERATION; + } + break; + case 't': + if (strcmp("ions", s + 3) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS; + } + break; + case 'r': + if (strcmp("der", s + 2) == 0) + return PIDX_PKEY_PARAM_EC_ORDER; + } + break; + case 'p': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_P1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_P2; + } + break; + case 'a': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'm': + if (strcmp("ode", s + 5) == 0) + return PIDX_PKEY_PARAM_PAD_MODE; + break; + case 't': + if (strcmp("ype", s + 5) == 0) + return PIDX_DIGEST_PARAM_PAD_TYPE; + } + break; + case 'd': + if (strcmp("ing", s + 4) == 0) + return PIDX_CIPHER_PARAM_PADDING; + break; + case '\0': + return PIDX_EXCHANGE_PARAM_PAD; + } + break; + case 'r': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'y': + switch(s[5]) { + default: + break; + case 'u': + if (strcmp("-info", s + 6) == 0) + return PIDX_KDF_PARAM_X942_PARTYUINFO; + break; + case 'v': + if (strcmp("-info", s + 6) == 0) + return PIDX_KDF_PARAM_X942_PARTYVINFO; + } + } + } + break; + case 's': + if (strcmp("s", s + 3) == 0) + return PIDX_KDF_PARAM_PASSWORD; + } + break; + case 'b': + if (strcmp("its", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_PBITS; + break; + case 'c': + if (strcmp("ounter", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_PCOUNTER; + break; + case 'k': + if (strcmp("cs5", s + 2) == 0) + return PIDX_KDF_PARAM_PKCS5; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'i': + if (strcmp("nt-format", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT; + break; + case 't': + if (strcmp("ential", s + 3) == 0) + return PIDX_GEN_PARAM_POTENTIAL; + } + break; + case 'r': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'd': + if (strcmp("iction_resistance", s + 4) == 0) + return PIDX_DRBG_PARAM_PREDICTION_RESISTANCE; + break; + case 'f': + if (strcmp("ix", s + 4) == 0) + return PIDX_KDF_PARAM_PREFIX; + } + break; + case 'i': + switch(s[3]) { + default: + break; + case 'm': + if (strcmp("es", s + 4) == 0) + return PIDX_PKEY_PARAM_RSA_PRIMES; + break; + case 'v': + switch(s[4]) { + default: + break; + case '_': + if (strcmp("len", s + 5) == 0) + return PIDX_PKEY_PARAM_DH_PRIV_LEN; + break; + case '\0': + return PIDX_PKEY_PARAM_PRIV_KEY; + } + } + break; + case 'o': + switch(s[3]) { + default: + break; + case 'p': + if (strcmp("erties", s + 4) == 0) + return PIDX_ALG_PARAM_PROPERTIES; + break; + case 'v': + if (strcmp("ider-name", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_PROV_NAME; + } + } + break; + case 'u': + if (strcmp("b", s + 2) == 0) + return PIDX_PKEY_PARAM_PUB_KEY; + break; + case '\0': + return PIDX_KDF_PARAM_SCRYPT_P; + } + break; + case 'q': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_Q1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_Q2; + } + break; + case 'b': + if (strcmp("its", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_QBITS; + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_Q; + break; + case 'x': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_PUB_X; + } + break; + case 'y': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_PUB_Y; + } + } + break; + case 'r': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case 'k': + if (strcmp("ey", s + 5) == 0) + return PIDX_CIPHER_PARAM_RANDOM_KEY; + break; + case 'o': + if (strcmp("m_data", s + 5) == 0) + return PIDX_DRBG_PARAM_RANDOM_DATA; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'a': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case '_': + switch(s[5]) { + default: + break; + case 'a': + if (strcmp("head", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD; + break; + case 'b': + if (strcmp("uffer_len", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN; + } + } + } + break; + case 'f': + if (strcmp("erence", s + 3) == 0) + return PIDX_OBJECT_PARAM_REFERENCE; + break; + case 's': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case '_': + switch(s[7]) { + default: + break; + case 'c': + if (strcmp("ounter", s + 8) == 0) + return PIDX_DRBG_PARAM_RESEED_COUNTER; + break; + case 'r': + if (strcmp("equests", s + 8) == 0) + return PIDX_DRBG_PARAM_RESEED_REQUESTS; + break; + case 't': + switch(s[8]) { + default: + break; + case 'i': + switch(s[9]) { + default: + break; + case 'm': + switch(s[10]) { + default: + break; + case 'e': + switch(s[11]) { + default: + break; + case '_': + if (strcmp("interval", s + 12) == 0) + return PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL; + break; + case '\0': + return PIDX_DRBG_PARAM_RESEED_TIME; + } + } + } + } + } + } + } + } + } + } + break; + case 'o': + if (strcmp("unds", s + 2) == 0) + return PIDX_CIPHER_PARAM_ROUNDS; + break; + case 's': + switch(s[2]) { + default: + break; + case 'a': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + switch(s[5]) { + default: + break; + case 'o': + switch(s[6]) { + default: + break; + case 'e': + switch(s[7]) { + default: + break; + case 'f': + switch(s[8]) { + default: + break; + case 'f': + switch(s[9]) { + default: + break; + case 'i': + switch(s[10]) { + default: + break; + case 'c': + switch(s[11]) { + default: + break; + case 'i': + switch(s[12]) { + default: + break; + case 'e': + switch(s[13]) { + default: + break; + case 'n': + switch(s[14]) { + default: + break; + case 't': + switch(s[15]) { + default: + break; + case '1': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT1; + } + break; + case '2': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT2; + } + break; + case '3': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT3; + } + break; + case '4': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT4; + } + break; + case '5': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT5; + } + break; + case '6': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT6; + } + break; + case '7': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT7; + } + break; + case '8': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT8; + } + break; + case '9': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT; + } + } + } + } + } + } + } + } + } + } + } + break; + case 'e': + switch(s[5]) { + default: + break; + case 'x': + switch(s[6]) { + default: + break; + case 'p': + switch(s[7]) { + default: + break; + case 'o': + switch(s[8]) { + default: + break; + case 'n': + switch(s[9]) { + default: + break; + case 'e': + switch(s[10]) { + default: + break; + case 'n': + switch(s[11]) { + default: + break; + case 't': + switch(s[12]) { + default: + break; + case '1': + switch(s[13]) { + default: + break; + case '0': + switch(s[14]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT10; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT1; + } + break; + case '2': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT2; + } + break; + case '3': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT3; + } + break; + case '4': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT4; + } + break; + case '5': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT5; + } + break; + case '6': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT6; + } + break; + case '7': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT7; + } + break; + case '8': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT8; + } + break; + case '9': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT; + } + } + } + } + } + } + } + } + break; + case 'f': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 'c': + switch(s[7]) { + default: + break; + case 't': + switch(s[8]) { + default: + break; + case 'o': + switch(s[9]) { + default: + break; + case 'r': + switch(s[10]) { + default: + break; + case '1': + switch(s[11]) { + default: + break; + case '0': + switch(s[12]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR10; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR1; + } + break; + case '2': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR2; + } + break; + case '3': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR3; + } + break; + case '4': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR4; + } + break; + case '5': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR5; + } + break; + case '6': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR6; + } + break; + case '7': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR7; + } + break; + case '8': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR8; + } + break; + case '9': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR; + } + } + } + } + } + } + } + } + } + break; + case '\0': + return PIDX_KDF_PARAM_SCRYPT_R; + } + break; + case 's': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'f': + if (strcmp("eprime-generator", s + 3) == 0) + return PIDX_PKEY_PARAM_DH_GENERATOR; + break; + case 'l': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'l': + if (strcmp("en", s + 5) == 0) + return PIDX_PKEY_PARAM_RSA_PSS_SALTLEN; + break; + case '\0': + return PIDX_KDF_PARAM_SALT; + } + } + break; + case 'v': + if (strcmp("e-parameters", s + 3) == 0) + return PIDX_ENCODER_PARAM_SAVE_PARAMETERS; + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'r': + if (strcmp("et", s + 4) == 0) + return PIDX_KDF_PARAM_SECRET; + break; + case 'u': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'y': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'b': + if (strcmp("its", s + 10) == 0) + return PIDX_PKEY_PARAM_SECURITY_BITS; + break; + case 'c': + if (strcmp("hecks", s + 10) == 0) + return PIDX_PROV_PARAM_SECURITY_CHECKS; + } + } + } + } + } + } + } + break; + case 'e': + if (strcmp("d", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_SEED; + break; + case 'r': + if (strcmp("ial", s + 3) == 0) + return PIDX_STORE_PARAM_SERIAL; + break; + case 's': + if (strcmp("sion_id", s + 3) == 0) + return PIDX_KDF_PARAM_SSHKDF_SESSION_ID; + } + break; + case 'i': + if (strcmp("ze", s + 2) == 0) + return PIDX_MAC_PARAM_SIZE; + break; + case 'p': + if (strcmp("eed", s + 2) == 0) + return PIDX_CIPHER_PARAM_SPEED; + break; + case 's': + if (strcmp("l3-ms", s + 2) == 0) + return PIDX_DIGEST_PARAM_SSL3_MS; + break; + case 't': + switch(s[2]) { + default: + break; + case '-': + switch(s[3]) { + default: + break; + case 'd': + if (strcmp("esc", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_DESC; + break; + case 'p': + if (strcmp("hase", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_PHASE; + break; + case 't': + if (strcmp("ype", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_TYPE; + } + break; + case 'a': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case '\0': + return PIDX_RAND_PARAM_STATE; + } + break; + case 'u': + if (strcmp("s", s + 5) == 0) + return PIDX_PROV_PARAM_STATUS; + } + } + break; + case 'r': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("m_mac", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC; + break; + case 'n': + if (strcmp("gth", s + 5) == 0) + return PIDX_RAND_PARAM_STRENGTH; + } + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case 'b': + if (strcmp("ject", s + 3) == 0) + return PIDX_STORE_PARAM_SUBJECT; + break; + case 'p': + switch(s[3]) { + default: + break; + case 'p': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'r': + if (strcmp("ivinfo", s + 7) == 0) + return PIDX_KDF_PARAM_X942_SUPP_PRIVINFO; + break; + case 'u': + if (strcmp("binfo", s + 7) == 0) + return PIDX_KDF_PARAM_X942_SUPP_PUBINFO; + } + } + } + } + } + } + break; + case 't': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_CIPHER_PARAM_AEAD_TAGLEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_AEAD_TAG; + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case '_': + switch(s[5]) { + default: + break; + case 'e': + if (strcmp("ntropy", s + 6) == 0) + return PIDX_RAND_PARAM_TEST_ENTROPY; + break; + case 'n': + if (strcmp("once", s + 6) == 0) + return PIDX_RAND_PARAM_TEST_NONCE; + } + } + } + } + break; + case 'h': + if (strcmp("reads", s + 2) == 0) + return PIDX_KDF_PARAM_THREADS; + break; + case 'l': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + if (strcmp("lient-version", s + 5) == 0) + return PIDX_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION; + break; + case 'd': + if (strcmp("ata-size", s + 5) == 0) + return PIDX_MAC_PARAM_TLS_DATA_SIZE; + break; + case 'g': + switch(s[5]) { + default: + break; + case 'r': + switch(s[6]) { + default: + break; + case 'o': + switch(s[7]) { + default: + break; + case 'u': + switch(s[8]) { + default: + break; + case 'p': + switch(s[9]) { + default: + break; + case '-': + switch(s[10]) { + default: + break; + case 'a': + if (strcmp("lg", s + 11) == 0) + return PIDX_CAPABILITY_TLS_GROUP_ALG; + break; + case 'i': + switch(s[11]) { + default: + break; + case 'd': + switch(s[12]) { + default: + break; + case '\0': + return PIDX_CAPABILITY_TLS_GROUP_ID; + } + break; + case 's': + if (strcmp("-kem", s + 12) == 0) + return PIDX_CAPABILITY_TLS_GROUP_IS_KEM; + } + break; + case 'n': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'm': + switch(s[13]) { + default: + break; + case 'e': + switch(s[14]) { + default: + break; + case '-': + if (strcmp("internal", s + 15) == 0) + return PIDX_CAPABILITY_TLS_GROUP_NAME_INTERNAL; + break; + case '\0': + return PIDX_CAPABILITY_TLS_GROUP_NAME; + } + } + } + } + break; + case 's': + if (strcmp("ec-bits", s + 11) == 0) + return PIDX_CAPABILITY_TLS_GROUP_SECURITY_BITS; + } + } + } + } + } + } + break; + case 'm': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 'c': + switch(s[7]) { + default: + break; + case '-': + if (strcmp("size", s + 8) == 0) + return PIDX_CIPHER_PARAM_TLS_MAC_SIZE; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS_MAC; + } + break; + case 'x': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("tls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS; + break; + case 't': + if (strcmp("ls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS; + } + } + } + break; + case 'i': + switch(s[6]) { + default: + break; + case 'n': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("tls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS; + break; + case 't': + if (strcmp("ls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS; + } + } + } + break; + case 'u': + if (strcmp("lti", s + 6) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK; + } + break; + case 'n': + if (strcmp("egotiated-version", s + 5) == 0) + return PIDX_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION; + break; + case 's': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 'g': + switch(s[7]) { + default: + break; + case 'a': + switch(s[8]) { + default: + break; + case 'l': + switch(s[9]) { + default: + break; + case 'g': + switch(s[10]) { + default: + break; + case '-': + switch(s[11]) { + default: + break; + case 'c': + if (strcmp("ode-point", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_CODE_POINT; + break; + case 'h': + switch(s[12]) { + default: + break; + case 'a': + switch(s[13]) { + default: + break; + case 's': + switch(s[14]) { + default: + break; + case 'h': + switch(s[15]) { + default: + break; + case '-': + switch(s[16]) { + default: + break; + case 'n': + if (strcmp("ame", s + 17) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_HASH_NAME; + break; + case 'o': + if (strcmp("id", s + 17) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_HASH_OID; + } + } + } + } + } + break; + case 'i': + if (strcmp("ana-name", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME; + break; + case 'k': + switch(s[12]) { + default: + break; + case 'e': + switch(s[13]) { + default: + break; + case 'y': + switch(s[14]) { + default: + break; + case 't': + switch(s[15]) { + default: + break; + case 'y': + switch(s[16]) { + default: + break; + case 'p': + switch(s[17]) { + default: + break; + case 'e': + switch(s[18]) { + default: + break; + case '-': + if (strcmp("oid", s + 19) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID; + break; + case '\0': + return PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE; + } + } + } + } + } + } + } + break; + case 'n': + if (strcmp("ame", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_NAME; + break; + case 'o': + if (strcmp("id", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_OID; + break; + case 's': + switch(s[12]) { + default: + break; + case 'e': + if (strcmp("c-bits", s + 13) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SECURITY_BITS; + break; + case 'i': + switch(s[13]) { + default: + break; + case 'g': + switch(s[14]) { + default: + break; + case '-': + switch(s[15]) { + default: + break; + case 'n': + if (strcmp("ame", s + 16) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SIG_NAME; + break; + case 'o': + if (strcmp("id", s + 16) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SIG_OID; + } + } + } + } + } + } + } + } + } + } + } + break; + case 'v': + if (strcmp("ersion", s + 5) == 0) + return PIDX_CIPHER_PARAM_TLS_VERSION; + } + break; + case '1': + switch(s[4]) { + default: + break; + case '-': + if (strcmp("prf-ems-check", s + 5) == 0) + return PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK; + break; + case 'm': + switch(s[5]) { + default: + break; + case 'u': + switch(s[6]) { + default: + break; + case 'l': + switch(s[7]) { + default: + break; + case 't': + switch(s[8]) { + default: + break; + case 'i': + switch(s[9]) { + default: + break; + case '_': + switch(s[10]) { + default: + break; + case 'a': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'd': + switch(s[13]) { + default: + break; + case 'p': + if (strcmp("acklen", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD; + } + } + } + break; + case 'e': + switch(s[11]) { + default: + break; + case 'n': + switch(s[12]) { + default: + break; + case 'c': + switch(s[13]) { + default: + break; + case 'i': + if (strcmp("n", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN; + break; + case 'l': + if (strcmp("en", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC; + } + } + } + break; + case 'i': + if (strcmp("nterleave", s + 11) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE; + break; + case 'm': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'x': + switch(s[13]) { + default: + break; + case 'b': + if (strcmp("ufsz", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE; + break; + case 's': + if (strcmp("ndfrag", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT; + } + } + } + } + } + } + } + } + } + } + break; + case 'a': + switch(s[4]) { + default: + break; + case 'a': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case 'p': + if (strcmp("ad", s + 7) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_AAD_PAD; + break; + case '\0': + return PIDX_CIPHER_PARAM_AEAD_TLS1_AAD; + } + } + } + break; + case 'i': + switch(s[4]) { + default: + break; + case 'v': + switch(s[5]) { + default: + break; + case 'f': + if (strcmp("ixed", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_IV_FIXED; + break; + case 'g': + if (strcmp("en", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN; + break; + case 'i': + if (strcmp("nv", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV; + } + } + break; + case 't': + if (strcmp("ree", s + 4) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE; + } + } + break; + case 'p': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS; + } + break; + case 'y': + if (strcmp("pe", s + 2) == 0) + return PIDX_OBJECT_PARAM_TYPE; + } + break; + case 'u': + switch(s[1]) { + default: + break; + case 'k': + if (strcmp("m", s + 2) == 0) + return PIDX_KDF_PARAM_UKM; + break; + case 'p': + if (strcmp("dated-iv", s + 2) == 0) + return PIDX_CIPHER_PARAM_UPDATED_IV; + break; + case 's': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'b': + if (strcmp("its", s + 5) == 0) + return PIDX_CIPHER_PARAM_USE_BITS; + break; + case 'c': + if (strcmp("ofactor-flag", s + 5) == 0) + return PIDX_PKEY_PARAM_USE_COFACTOR_FLAG; + break; + case 'k': + if (strcmp("eybits", s + 5) == 0) + return PIDX_KDF_PARAM_X942_USE_KEYBITS; + break; + case 'l': + switch(s[5]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_KBKDF_USE_L; + } + break; + case 's': + if (strcmp("eparator", s + 5) == 0) + return PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR; + } + break; + case '_': + switch(s[4]) { + default: + break; + case 'd': + if (strcmp("erivation_function", s + 5) == 0) + return PIDX_DRBG_PARAM_USE_DF; + break; + case 'e': + if (strcmp("tm", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM; + } + } + } + } + break; + case 'v': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'l': + switch(s[3]) { + default: + break; + case 'i': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'g': + switch(s[10]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_VALIDATE_G; + } + break; + case 'l': + if (strcmp("egacy", s + 10) == 0) + return PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY; + break; + case 'p': + if (strcmp("q", s + 10) == 0) + return PIDX_PKEY_PARAM_FFC_VALIDATE_PQ; + } + } + } + } + } + } + } + } + break; + case 'e': + if (strcmp("rsion", s + 2) == 0) + return PIDX_PROV_PARAM_VERSION; + } + break; + case 'x': + switch(s[1]) { + default: + break; + case 'c': + if (strcmp("ghash", s + 2) == 0) + return PIDX_KDF_PARAM_SSHKDF_XCGHASH; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_DIGEST_PARAM_XOFLEN; + break; + case '\0': + return PIDX_MAC_PARAM_XOF; + } + } + break; + case 'p': + switch(s[2]) { + default: + break; + case '1': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP1; + } + break; + case '2': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP2; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP; + } + break; + case 'q': + switch(s[2]) { + default: + break; + case '1': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ1; + } + break; + case '2': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ2; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ; + } + break; + case 't': + if (strcmp("s_standard", s + 2) == 0) + return PIDX_CIPHER_PARAM_XTS_STANDARD; + } + } + return -1; +} + +/* End of TRIE */ From 47b7d3748dae344b00b2727dc13a342e0772dbf0 Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Sat, 20 Apr 2024 03:24:21 +0000 Subject: [PATCH 201/651] Fix code for loongarch64 Fix `base/poco` and `contrib/openssl-cmake` based on code review comments(https://github.com/ClickHouse/ClickHouse/pull/61509). --- base/poco/Foundation/include/Poco/Platform.h | 1 + contrib/openssl-cmake/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/base/poco/Foundation/include/Poco/Platform.h b/base/poco/Foundation/include/Poco/Platform.h index 2668d7d40a4..30937d0fc91 100644 --- a/base/poco/Foundation/include/Poco/Platform.h +++ b/base/poco/Foundation/include/Poco/Platform.h @@ -212,6 +212,7 @@ # define POCO_ARCH_LITTLE_ENDIAN 1 #elif defined(__loongarch64) # define POCO_ARCH POCO_ARCH_LOONGARCH64 +# define POCO_ARCH_LITTLE_ENDIAN 1 #endif diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index f5c3df1a7bc..c0ab5f2d06d 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -210,7 +210,7 @@ elseif(ARCH_RISCV64) elseif(ARCH_LOONGARCH64) macro(perl_generate_asm FILE_IN FILE_OUT) add_custom_command(OUTPUT ${FILE_OUT} - COMMAND /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) + COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) endmacro() perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/loongarch64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S) From 007c9be4db352567ef9a414a3aaecd1380d9de0d Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Tue, 14 May 2024 10:14:23 +0200 Subject: [PATCH 202/651] Restart CI From 537b56b1396d19ab3caa0bf546a2c546ae923dbf Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Tue, 14 May 2024 08:03:32 +0000 Subject: [PATCH 203/651] Add loongarch64 CI support --- .github/workflows/master.yml | 17 ++++++++--------- docker/packager/packager | 1 + tests/ci/ci_config.py | 8 ++++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 3ff9bc5693a..23650e7afc9 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -240,15 +240,13 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 BuilderBinS390X: - # disabled because s390x refused to build in the migration to OpenSSL - # BuilderBinS390X: - # needs: [RunConfig, BuilderDebRelease] - # if: ${{ !failure() && !cancelled() }} - # uses: ./.github/workflows/reusable_build.yml - # with: - # build_name: binary_s390x - # data: ${{ needs.RunConfig.outputs.data }} - # checkout_depth: 0 + needs: [RunConfig, BuilderDebRelease] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_build.yml + with: + build_name: binary_s390x + data: ${{ needs.RunConfig.outputs.data }} + checkout_depth: 0 BuilderBinLOONGARCH64: needs: [RunConfig, BuilderDebRelease] if: ${{ !failure() && !cancelled() }} @@ -308,6 +306,7 @@ jobs: - BuilderBinPPC64 - BuilderBinRISCV64 - BuilderBinS390X + - BuilderBinLOONGARCH64 - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy diff --git a/docker/packager/packager b/docker/packager/packager index f11cd30078b..2cda9b67893 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -149,6 +149,7 @@ def parse_env_variables( PPC_SUFFIX = "-ppc64le" RISCV_SUFFIX = "-riscv64" S390X_SUFFIX = "-s390x" + LOONGARCH_SUFFIX = "-loongarch64" AMD64_COMPAT_SUFFIX = "-amd64-compat" AMD64_MUSL_SUFFIX = "-amd64-musl" diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 588f4934125..642e444f308 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -79,6 +79,7 @@ class Build(metaclass=WithIter): BINARY_AMD64_MUSL = "binary_amd64_musl" BINARY_RISCV64 = "binary_riscv64" BINARY_S390X = "binary_s390x" + BINARY_LOONGARCH64 = "binary_loongarch64" FUZZERS = "fuzzers" @@ -1040,6 +1041,12 @@ CI_CONFIG = CIConfig( package_type="binary", static_binary_name="s390x", ), + Build.BINARY_LOONGARCH64: BuildConfig( + name=Build.BINARY_LOONGARCH64, + compiler="clang-18-loongarch64", + package_type="binary", + static_binary_name="loongarch64", + ), Build.FUZZERS: BuildConfig( name=Build.FUZZERS, compiler="clang-18", @@ -1070,6 +1077,7 @@ CI_CONFIG = CIConfig( Build.BINARY_PPC64LE, Build.BINARY_RISCV64, Build.BINARY_S390X, + Build.BINARY_LOONGARCH64, Build.BINARY_AMD64_COMPAT, Build.BINARY_AMD64_MUSL, Build.PACKAGE_RELEASE_COVERAGE, From 1eb72e7af3f7f4d9aed43e535712a4d41dd60b71 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 14 May 2024 09:25:23 +0000 Subject: [PATCH 204/651] Add no fasttest tag --- .../0_stateless/03036_schema_inference_cache_s3_archives.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql index bfa50f1ebe1..1a73eadbb53 100644 --- a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + select * from s3(s3_conn, filename='03036_archive1.zip :: example{1,2}.csv') order by tuple(*); select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file; From 13508de9a4176add6a8a9b163a603528c3162a81 Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Tue, 14 May 2024 10:49:09 +0000 Subject: [PATCH 205/651] Fix code according to review suggestions --- .github/workflows/master.yml | 4 ++-- cmake/target.cmake | 1 - docker/packager/packager | 6 ++++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 23650e7afc9..d2ea714e4e4 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -247,7 +247,7 @@ jobs: build_name: binary_s390x data: ${{ needs.RunConfig.outputs.data }} checkout_depth: 0 - BuilderBinLOONGARCH64: + BuilderBinLoongarch64: needs: [RunConfig, BuilderDebRelease] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_build.yml @@ -306,7 +306,7 @@ jobs: - BuilderBinPPC64 - BuilderBinRISCV64 - BuilderBinS390X - - BuilderBinLOONGARCH64 + - BuilderBinLoongarch64 - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy diff --git a/cmake/target.cmake b/cmake/target.cmake index 52d67725edf..3832e3cabf7 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -75,7 +75,6 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") set (ENABLE_DWARF_PARSER OFF CACHE INTERNAL "") set (ENABLE_BLAKE3 OFF CACHE INTERNAL "") - set (COMPILER_CACHE disabled CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/docker/packager/packager b/docker/packager/packager index 2cda9b67893..6e0c194b0b3 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,6 +168,7 @@ def parse_env_variables( is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_riscv = compiler.endswith(RISCV_SUFFIX) is_cross_s390x = compiler.endswith(S390X_SUFFIX) + is_cross_loongarch = compiler.endswith(LOONGARCH_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) is_amd64_musl = compiler.endswith(AMD64_MUSL_SUFFIX) @@ -236,6 +237,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake" ) + elif is_cross_loongarch: + cc = compiler[: -len(LOONGARCH_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-loongarch64.cmake" + ) elif is_amd64_compat: cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") From c30dea2d30b92bcf80dc5240def379da20c3163e Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Tue, 14 May 2024 11:05:55 +0000 Subject: [PATCH 206/651] Fix loongarch64's CI code --- docker/packager/packager | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/packager/packager b/docker/packager/packager index 6e0c194b0b3..2dcbd8d695e 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -418,6 +418,7 @@ def parse_args() -> argparse.Namespace: "clang-18-ppc64le", "clang-18-riscv64", "clang-18-s390x", + "clang-18-loongarch64", "clang-18-amd64-compat", "clang-18-amd64-musl", "clang-18-freebsd", From 3737e6fdea48d5aea308f08bf9b7684345b84165 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 14 May 2024 13:05:41 +0000 Subject: [PATCH 207/651] Possible fix of stability of schema_infernce_cache test --- .../0_stateless/03036_schema_inference_cache_s3_archives.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql index 1a73eadbb53..6f7134cfa38 100644 --- a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql @@ -1,5 +1,6 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS +SYSTEM DROP SCHEMA CACHE FOR S3; select * from s3(s3_conn, filename='03036_archive1.zip :: example{1,2}.csv') order by tuple(*); select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file; From 33885b27bfba856410ee18332f37489db753f228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 14 May 2024 16:25:12 +0200 Subject: [PATCH 208/651] Fix fuzzer when only explicit faults are used --- src/Common/ThreadFuzzer.cpp | 26 ++++++++++++++++++-------- src/Common/ThreadFuzzer.h | 1 + 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index d1e252a8184..53f28047614 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -51,16 +51,17 @@ namespace ErrorCodes ThreadFuzzer::ThreadFuzzer() { initConfiguration(); + if (needsSetup()) + setup(); + if (!isEffective()) { /// It has no effect - disable it stop(); return; } - setup(); } - template static void initFromEnv(T & what, const char * name) { @@ -133,10 +134,16 @@ void ThreadFuzzer::initConfiguration() } +bool ThreadFuzzer::needsSetup() const +{ + return cpu_time_period_us != 0 + && (yield_probability > 0 || migrate_probability > 0 || (sleep_probability > 0 && sleep_time_us_max > 0)); +} + bool ThreadFuzzer::isEffective() const { - if (!isStarted()) - return false; + if (needsSetup()) + return true; #if THREAD_FUZZER_WRAP_PTHREAD # define CHECK_WRAPPER_PARAMS(RET, NAME, ...) \ @@ -163,10 +170,13 @@ bool ThreadFuzzer::isEffective() const # undef INIT_WRAPPER_PARAMS #endif - return cpu_time_period_us != 0 - && (yield_probability > 0 - || migrate_probability > 0 - || (sleep_probability > 0 && sleep_time_us_max > 0)); + if (explicit_sleep_probability > 0 && sleep_time_us_max > 0) + return true; + + if (explicit_memory_exception_probability > 0) + return true; + + return false; } void ThreadFuzzer::stop() diff --git a/src/Common/ThreadFuzzer.h b/src/Common/ThreadFuzzer.h index e8f1fe606f0..b73587427de 100644 --- a/src/Common/ThreadFuzzer.h +++ b/src/Common/ThreadFuzzer.h @@ -52,6 +52,7 @@ public: } bool isEffective() const; + bool needsSetup() const; static void stop(); static void start(); From 525b3d9d6143289d5d957106b616f9ec88203ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 14 May 2024 16:51:01 +0200 Subject: [PATCH 209/651] Fix division by 0 --- src/Common/ThreadFuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 53f28047614..e0e67c8a67c 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -234,7 +234,7 @@ static void injectionImpl( && sleep_time_us_max > 0 && std::bernoulli_distribution(sleep_probability)(thread_local_rng)) { - sleepForNanoseconds((thread_local_rng() % static_cast(sleep_time_us_max)) * 1000); /*may sleep(0)*/ + sleepForNanoseconds((thread_local_rng() % static_cast(sleep_time_us_max * 1000))); /*may sleep(0)*/ } } From 6254a4fe32967d763f727fb6bafa35e3b36c189b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 14 May 2024 16:55:19 +0200 Subject: [PATCH 210/651] Enforce some minimum sleep value --- src/Common/ThreadFuzzer.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index e0e67c8a67c..1a2d3b6ff48 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -153,7 +153,7 @@ bool ThreadFuzzer::isEffective() const return true; \ if (NAME##_before_sleep_probability.load(std::memory_order_relaxed) > 0.0) \ return true; \ - if (NAME##_before_sleep_time_us_max.load(std::memory_order_relaxed) > 0.0) \ + if (NAME##_before_sleep_time_us_max.load(std::memory_order_relaxed) > 0.001) \ return true; \ \ if (NAME##_after_yield_probability.load(std::memory_order_relaxed) > 0.0) \ @@ -162,7 +162,7 @@ bool ThreadFuzzer::isEffective() const return true; \ if (NAME##_after_sleep_probability.load(std::memory_order_relaxed) > 0.0) \ return true; \ - if (NAME##_after_sleep_time_us_max.load(std::memory_order_relaxed) > 0.0) \ + if (NAME##_after_sleep_time_us_max.load(std::memory_order_relaxed) > 0.001) \ return true; FOR_EACH_WRAPPED_FUNCTION(CHECK_WRAPPER_PARAMS) @@ -170,7 +170,7 @@ bool ThreadFuzzer::isEffective() const # undef INIT_WRAPPER_PARAMS #endif - if (explicit_sleep_probability > 0 && sleep_time_us_max > 0) + if (explicit_sleep_probability > 0 && sleep_time_us_max > 0.001) return true; if (explicit_memory_exception_probability > 0) @@ -230,11 +230,9 @@ static void injectionImpl( UNUSED(migrate_probability); #endif - if (sleep_probability > 0 - && sleep_time_us_max > 0 - && std::bernoulli_distribution(sleep_probability)(thread_local_rng)) + if (sleep_probability > 0 && sleep_time_us_max > 0.001 && std::bernoulli_distribution(sleep_probability)(thread_local_rng)) { - sleepForNanoseconds((thread_local_rng() % static_cast(sleep_time_us_max * 1000))); /*may sleep(0)*/ + sleepForNanoseconds((thread_local_rng() % static_cast(sleep_time_us_max * 1000))); } } From 851a8495208cc77e56788bf52f86ce89f17cb741 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 14 May 2024 15:05:33 +0000 Subject: [PATCH 211/651] Fix test --- ...03036_schema_inference_cache_s3_archives.reference | 3 --- .../03036_schema_inference_cache_s3_archives.sql | 11 +++++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference index 8bee9a685e3..34f4287f360 100644 --- a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.reference @@ -3,12 +3,9 @@ 3 Str3 4 Str4 DEFAULT 03036_archive1.zip::example1.csv id Nullable(Int64), data Nullable(String) -DEFAULT 03036_archive1.zip::example2.csv \N 21 Str21 22 Str22 23 Str23 24 Str24 -DEFAULT 03036_archive1.zip::example1.csv id Nullable(Int64), data Nullable(String) -DEFAULT 03036_archive1.zip::example2.csv \N UNION 03036_json_archive.zip::example11.jsonl id Nullable(Int64), data Nullable(String) UNION 03036_json_archive.zip::example12.jsonl id Nullable(Int64), data Nullable(String) diff --git a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql index 6f7134cfa38..61b3e1d6f43 100644 --- a/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql +++ b/tests/queries/0_stateless/03036_schema_inference_cache_s3_archives.sql @@ -1,10 +1,9 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SYSTEM DROP SCHEMA CACHE FOR S3; -select * from s3(s3_conn, filename='03036_archive1.zip :: example{1,2}.csv') order by tuple(*); -select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file; +SELECT * FROM s3(s3_conn, filename='03036_archive1.zip :: example{1,2}.csv') ORDER BY tuple(*); +SELECT schema_inference_mode, splitByChar('/', source)[-1] as file, schema FROM system.schema_inference_cache WHERE file = '03036_archive1.zip::example1.csv' ORDER BY file; -set schema_inference_mode = 'union'; -select * from s3(s3_conn, filename='03036_json_archive.zip :: example{11,12}.jsonl') order by tuple(*); -select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache order by file; \ No newline at end of file +SET schema_inference_mode = 'union'; +SELECT * FROM s3(s3_conn, filename='03036_json_archive.zip :: example{11,12}.jsonl') ORDER BY tuple(*); +SELECT schema_inference_mode, splitByChar('/', source)[-1] as file, schema FROM system.schema_inference_cache WHERE startsWith(file, '03036_json_archive.zip') ORDER BY file; \ No newline at end of file From 7ad87c80f30ade7dc5a1c83d916f853e5db68fea Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 14 May 2024 17:22:36 +0200 Subject: [PATCH 212/651] Update test.py --- tests/integration/test_replicated_database/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 643b8ec154d..8c4d4761a13 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -60,7 +60,7 @@ all_nodes = [ ] bad_settings_node = cluster.add_instance( - "snapshot_recovering_node", + "bad_settings_node", main_configs=["configs/config.xml"], user_configs=["configs/inconsistent_settings.xml"], with_zookeeper=True, From f931f086cd87d974bd3ff9ce0cb0a4643a89a88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 14 May 2024 17:23:22 +0200 Subject: [PATCH 213/651] Fix possible crash with SYSTEM UNLOAD PRIMARY KEY --- src/Processors/QueryPlan/PartsSplitter.cpp | 15 +++- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 ++++- src/Storages/MergeTree/IMergeTreeDataPart.h | 5 +- .../MergeTree/MergedBlockOutputStream.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- .../03151_unload_index_race.reference | 0 .../0_stateless/03151_unload_index_race.sh | 75 +++++++++++++++++++ 7 files changed, 104 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/03151_unload_index_race.reference create mode 100755 tests/queries/0_stateless/03151_unload_index_race.sh diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 3e9e3f7ea11..6d70aa8a60d 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -128,15 +128,21 @@ class IndexAccess public: explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { - /// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`) - /// and we need to use the same set of index columns across all parts. + /// Indices might be reloaded during the process and the reload might produce a different value + /// (change in `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`). Also, some suffix of index + /// columns might not be loaded (same setting) so we keep a reference to the current indices and + /// track the minimal subset of loaded columns across all parts. + indices.reserve(parts.size()); for (const auto & part : parts) - loaded_columns = std::min(loaded_columns, part.data_part->getIndex()->size()); + indices.push_back(part.data_part->getIndex()); + + for (const auto & index : indices) + loaded_columns = std::min(loaded_columns, index->size()); } Values getValue(size_t part_idx, size_t mark) const { - const auto & index = parts[part_idx].data_part->getIndex(); + const auto & index = indices[part_idx]; chassert(index->size() >= loaded_columns); Values values(loaded_columns); for (size_t i = 0; i < loaded_columns; ++i) @@ -206,6 +212,7 @@ public: } private: const RangesInDataParts & parts; + std::vector indices; size_t loaded_columns = std::numeric_limits::max(); }; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 34bab967c7f..ca3f0de1dea 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -346,16 +346,25 @@ IMergeTreeDataPart::Index IMergeTreeDataPart::getIndex() const if (!index_loaded) loadIndex(); index_loaded = true; - return TSA_SUPPRESS_WARNING_FOR_READ(index); /// The variable is guaranteed to be unchanged after return. + return index; } -void IMergeTreeDataPart::setIndex(Index index_) +void IMergeTreeDataPart::setIndex(const Columns & cols_) { std::scoped_lock lock(index_mutex); if (!index->empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); - index = index_; + index = std::make_shared(cols_); + index_loaded = true; +} + +void IMergeTreeDataPart::setIndex(Columns && cols_) +{ + std::scoped_lock lock(index_mutex); + if (!index->empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); + index = std::make_shared(std::move(cols_)); index_loaded = true; } @@ -913,7 +922,7 @@ void IMergeTreeDataPart::loadIndex() const if (!index_file->eof()) throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path); - index->assign(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); + index = std::make_shared(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); } } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index b23f5e419dc..219dec28af5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -79,7 +79,7 @@ public: using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; - using Index = std::shared_ptr; + using Index = std::shared_ptr; using IndexSizeByName = std::unordered_map; using Type = MergeTreeDataPartType; @@ -368,7 +368,8 @@ public: int32_t metadata_version; Index getIndex() const; - void setIndex(Index index_); + void setIndex(const Columns & index_); + void setIndex(Columns && index_); void unloadIndex(); /// For data in RAM ('index') diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9f641fd8eb5..d8555d69788 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -181,7 +181,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); - new_part->setIndex(std::make_shared(writer->releaseIndexColumns())); + new_part->setIndex(writer->releaseIndexColumns()); new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 7fc501fd13f..5934756fb95 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -935,7 +935,7 @@ void finalizeMutatedPart( new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; - new_data_part->setIndex(source_part->getIndex()); + new_data_part->setIndex(*source_part->getIndex()); new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); diff --git a/tests/queries/0_stateless/03151_unload_index_race.reference b/tests/queries/0_stateless/03151_unload_index_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03151_unload_index_race.sh b/tests/queries/0_stateless/03151_unload_index_race.sh new file mode 100755 index 00000000000..12198aaa174 --- /dev/null +++ b/tests/queries/0_stateless/03151_unload_index_race.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long, no-parallel +# Disable parallel since it creates 10 different threads querying and might overload the server + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table t(a UInt32, b UInt32, c UInt32) engine=MergeTree order by (a, b, c) settings index_granularity=1;" +$CLICKHOUSE_CLIENT -q "system stop merges t;" + +# In this part a only changes 10% of the time, b 50% of the time, c all the time +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 10), intDiv(number, 2), number from numbers_mt(100);" + +# In this part a only changes 33% of the time, b 50% of the time, c 10 % of the time +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t select intDiv(number, 3), intDiv(number, 2), intDiv(number, 3) from numbers_mt(100);" + +# In this part a changes 100% of the time +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select number, number, number from numbers_mt(100);" + + +# In this part a changes 100% of the time +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" +$CLICKHOUSE_CLIENT -q "insert into t Select 0, intDiv(number, 10), intDiv(number, 2) from numbers_mt(100);" + +function thread_alter_settings() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "ALTER TABLE t MODIFY SETTING primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns=0.$RANDOM" + $CLICKHOUSE_CLIENT -n --query "SYSTEM UNLOAD PRIMARY KEY t" + sleep 0.0$RANDOM + done +} + +function thread_query_table() +{ + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + COUNT=$($CLICKHOUSE_CLIENT -n --query "SELECT count() FROM t where not ignore(*);") + if [ "$COUNT" -ne "2000" ]; then + echo "$COUNT" + fi + done +} + +export -f thread_alter_settings +export -f thread_query_table + +TIMEOUT=10 + +thread_alter_settings $TIMEOUT & +for i in $(seq 1 10); +do + thread_query_table $TIMEOUT & +done + +wait + +$CLICKHOUSE_CLIENT -q "SELECT count() FROM t FORMAT Null" From 7de812000c7453b664a4bb6d02f5c3017b00ffad Mon Sep 17 00:00:00 2001 From: tomershafir Date: Tue, 14 May 2024 18:50:51 +0300 Subject: [PATCH 214/651] io_uring: add basic io_uring clickhouse perf test --- tests/performance/io_uring.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/performance/io_uring.xml diff --git a/tests/performance/io_uring.xml b/tests/performance/io_uring.xml new file mode 100644 index 00000000000..59674894aae --- /dev/null +++ b/tests/performance/io_uring.xml @@ -0,0 +1,13 @@ + + + io_uring + + + CREATE TABLE hits_none (WatchID UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() + INSERT INTO hits_none SELECT WatchID FROM test.hits + OPTIMIZE TABLE hits_none FINAL + + + + DROP TABLE hits_none + From 937b7323cb4505185fb0cd83e35fc5c85356d616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 14 May 2024 18:09:28 +0200 Subject: [PATCH 215/651] Shellcheck --- tests/queries/0_stateless/03151_unload_index_race.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03151_unload_index_race.sh b/tests/queries/0_stateless/03151_unload_index_race.sh index 12198aaa174..7e9dfa7cddc 100755 --- a/tests/queries/0_stateless/03151_unload_index_race.sh +++ b/tests/queries/0_stateless/03151_unload_index_race.sh @@ -65,7 +65,7 @@ export -f thread_query_table TIMEOUT=10 thread_alter_settings $TIMEOUT & -for i in $(seq 1 10); +for _ in $(seq 1 10); do thread_query_table $TIMEOUT & done From 0abb2be5eb55183e83c218cf352c88c7fb497939 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 14 May 2024 18:40:09 +0200 Subject: [PATCH 216/651] Review fixes --- docs/en/operations/settings/settings.md | 50 +++++++++++++++++++ .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 7 ++- .../ObjectStorages/HDFS/HDFSObjectStorage.h | 3 ++ src/Storages/Cache/SchemaCache.cpp | 1 - .../ObjectStorage/StorageObjectStorage.cpp | 6 +-- .../ObjectStorage/StorageObjectStorage.h | 6 ++- .../StorageObjectStorageSource.cpp | 2 +- .../StorageObjectStorageSource.h | 2 +- src/Storages/ObjectStorage/Utils.cpp | 6 +-- .../registerStorageObjectStorage.cpp | 6 +-- 10 files changed, 70 insertions(+), 19 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 91b544c6a82..72bd1ca8e2c 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3665,6 +3665,16 @@ Possible values: Default value: `0`. +## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist} + +Ignore ansense of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + ## hdfs_truncate_on_insert {#hdfs_truncate_on_insert} Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. @@ -3697,6 +3707,46 @@ Possible values: Default value: `0`. +## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match} + +Throw an error if matched zero files according to glob expansion rules. + +Possible values: +- 1 — `SELECT` throws an exception. +- 0 — `SELECT` returns empty result. + +Default value: `0`. + +## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist} + +Ignore ansense of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + +## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match} + +Throw an error if matched zero files according to glob expansion rules. + +Possible values: +- 1 — `SELECT` throws an exception. +- 0 — `SELECT` returns empty result. + +Default value: `0`. + +## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist} + +Ignore ansense of file if it does not exist when reading certain keys. + +Possible values: +- 1 — `SELECT` returns empty result. +- 0 — `SELECT` throws an exception. + +Default value: `0`. + ## engine_url_skip_empty_files {#engine_url_skip_empty_files} Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables. diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 6c2f310a7d1..1f3a4bdf6c7 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -186,7 +186,6 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { initializeHDFSFS(); - auto * log = &Poco::Logger::get("HDFSObjectStorage"); LOG_TEST(log, "Trying to list files for {}", path); HDFSFileInfo ls; @@ -210,9 +209,6 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM for (int i = 0; i < ls.length; ++i) { const String file_path = fs::path(ls.file_info[i].mName).lexically_normal(); - const size_t last_slash = file_path.rfind('/'); - const String file_name = file_path.substr(last_slash); - const bool is_directory = ls.file_info[i].mKind == 'D'; if (is_directory) { @@ -227,6 +223,9 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod), {}})); } + + if (children.size() >= max_keys) + break; } } diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index e747b283400..8aae90d0721 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -39,6 +39,7 @@ public: bool lazy_initialize) : config(config_) , settings(std::move(settings_)) + , log(getLogger("HDFSObjectStorage(" + hdfs_root_path_ + ")")) { const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2); url = hdfs_root_path_; @@ -134,6 +135,8 @@ private: std::string url; std::string url_without_path; std::string data_directory; + + LoggerPtr log; }; } diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp index 5dc39f04ae0..299dd292772 100644 --- a/src/Storages/Cache/SchemaCache.cpp +++ b/src/Storages/Cache/SchemaCache.cpp @@ -1,6 +1,5 @@ #include #include -#include #include namespace ProfileEvents diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 01790760747..c5affb7989f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -61,10 +61,6 @@ StorageObjectStorage::StorageObjectStorage( metadata.setConstraints(constraints_); metadata.setComment(comment); - StoredObjects objects; - for (const auto & key : configuration->getPaths()) - objects.emplace_back(key); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); setInMemoryMetadata(metadata); } @@ -93,7 +89,7 @@ void StorageObjectStorage::updateConfiguration(ContextPtr context) { /// FIXME: we should be able to update everything apart from client if static_configuration == true. if (!configuration->isStaticConfiguration()) - object_storage->applyNewSettings(context->getConfigRef(), "s3.", context); + object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context); } namespace diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index a396bad9d6e..928d49f9604 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -124,7 +124,6 @@ protected: ConfigurationPtr configuration; const ObjectStoragePtr object_storage; - const std::string engine_name; const std::optional format_settings; const ASTPtr partition_by; const bool distributed_processing; @@ -148,7 +147,9 @@ public: ContextPtr local_context, bool with_table_structure); + /// Storage type: s3, hdfs, azure. virtual std::string getTypeName() const = 0; + /// Engine name: S3, HDFS, Azure. virtual std::string getEngineName() const = 0; virtual Path getPath() const = 0; @@ -158,7 +159,10 @@ public: virtual void setPaths(const Paths & paths) = 0; virtual String getDataSourceDescription() = 0; + /// Sometimes object storages have something similar to chroot or namespace, for example + /// buckets in S3. If object storage doesn't have any namepaces return empty string. virtual String getNamespace() const = 0; + virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; virtual void addStructureAndFormatToArgs( ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index cb3f732ce83..e28924617e0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -321,7 +321,7 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const S const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; - /// User's S3 object may change, don't cache it. + /// User's object may change, don't cache it. read_settings.use_page_cache_for_disks_without_file_cache = false; // Create a read buffer that will prefetch the first ~1 MB of the file. diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index a8df00bc0ac..08d545f9b85 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -62,7 +62,7 @@ protected: const std::optional format_settings; const UInt64 max_block_size; const bool need_only_count; - const ReadFromFormatInfo read_from_format_info; + const ReadFromFormatInfo & read_from_format_info; const std::shared_ptr create_reader_pool; ColumnsDescription columns_desc; diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index bde3cb7e1cb..e49e14d2a0c 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -38,9 +38,9 @@ std::optional checkAndGetNewFileOnInsertIfNeeded( throw Exception( ErrorCodes::BAD_ARGUMENTS, "Object in bucket {} with key {} already exists. " - "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - configuration.getNamespace(), key); + "If you want to overwrite it, enable setting {}_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting {}_create_new_file_on_insert", + configuration.getNamespace(), key, configuration.getTypeName(), configuration.getTypeName()); } void resolveSchemaAndFormat( diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index 74c8aeaad7d..bf595b2f5d4 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -106,17 +106,17 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) void registerStorageS3(StorageFactory & factory) { - return registerStorageS3Impl("S3", factory); + registerStorageS3Impl("S3", factory); } void registerStorageCOS(StorageFactory & factory) { - return registerStorageS3Impl("COSN", factory); + registerStorageS3Impl("COSN", factory); } void registerStorageOSS(StorageFactory & factory) { - return registerStorageS3Impl("OSS", factory); + registerStorageS3Impl("OSS", factory); } #endif From 3778cee49e1d6ac1f0f4f470ba5d63458c33df3b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 14 May 2024 18:41:19 +0200 Subject: [PATCH 217/651] Update src/Core/Settings.h Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Core/Settings.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index aa20f68ac0d..066a551b37b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -117,9 +117,9 @@ class IColumn; M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ - M(Bool, s3_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageS3", 0) \ - M(Bool, hdfs_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS", 0) \ - M(Bool, azure_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageAzure", 0) \ + M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in S3 table engine", 0) \ + M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ + M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ From be693ceba7fa17e2c03c54197fb0d0f301640cc1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 14 May 2024 18:46:35 +0200 Subject: [PATCH 218/651] Minor --- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index c5affb7989f..bc5b347d1e0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -402,7 +402,6 @@ void StorageObjectStorage::Configuration::initialize( else configuration.fromAST(engine_args, local_context, with_table_structure); - // FIXME: it should be - if (format == "auto" && get_format_from_file) if (configuration.format == "auto") configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); else From b27c5295488cbc6bb79c096577ffc7f743172cab Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 14 May 2024 17:51:21 +0000 Subject: [PATCH 219/651] Fix a query with duplicating cycling alias. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 12 ++++++++---- .../02896_cyclic_aliases_crash.reference | 1 + .../0_stateless/02896_cyclic_aliases_crash.sql | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index e20ff4ab909..0d2cd5c5537 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1024,21 +1024,21 @@ private: if (is_lambda_node) { if (scope.alias_name_to_expression_node.contains(alias)) - scope.nodes_with_duplicated_aliases.insert(node); + scope.nodes_with_duplicated_aliases.insert(node->clone()); auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); if (!inserted) - scope.nodes_with_duplicated_aliases.insert(node); + scope.nodes_with_duplicated_aliases.insert(node->clone()); return; } if (scope.alias_name_to_lambda_node.contains(alias)) - scope.nodes_with_duplicated_aliases.insert(node); + scope.nodes_with_duplicated_aliases.insert(node->clone()); auto [_, inserted] = scope.alias_name_to_expression_node.insert(std::make_pair(alias, node)); if (!inserted) - scope.nodes_with_duplicated_aliases.insert(node); + scope.nodes_with_duplicated_aliases.insert(node->clone()); /// If node is identifier put it also in scope alias name to lambda node map if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) @@ -8154,6 +8154,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier { auto node = node_with_duplicated_alias; auto node_alias = node->getAlias(); + + /// Add current alias to non cached set, because in case of cyclic alias identifier should not be substituted from cache. + /// See 02896_cyclic_aliases_crash. + scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); resolveExpressionNode(node, scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/); bool has_node_in_alias_table = false; diff --git a/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference b/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference index d43017edcc5..caf11f5c15a 100644 --- a/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference +++ b/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference @@ -1 +1,2 @@ 1 2 3 +1 5 diff --git a/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql b/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql index 76eff95cf31..5fb628eeb67 100644 --- a/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql +++ b/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql @@ -15,3 +15,18 @@ SELECT val + prev as val2 FROM ( SELECT 1 as val ) ; + +select number % 2 as number, count() from numbers(10) where number != 0 group by number % 2 as number; + +CREATE TABLE test_table (time_stamp_utc DateTime, impressions UInt32, clicks UInt32, revenue Float32) ENGINE = MergeTree ORDER BY time_stamp_utc; + +SELECT + toStartOfDay(toDateTime(time_stamp_utc)) AS time_stamp_utc, + sum(impressions) AS Impressions, + sum(clicks) AS Clicks, + sum(revenue) AS Revenue +FROM test_table +WHERE (time_stamp_utc >= toDateTime('2024-04-25 00:00:00')) AND (time_stamp_utc < toDateTime('2024-05-02 00:00:00')) +GROUP BY time_stamp_utc +ORDER BY Impressions DESC +LIMIT 1000; From 65f404c153fb96602ec07c4f3919af14468b8d7d Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 14 May 2024 21:28:40 +0200 Subject: [PATCH 220/651] Review fixes --- docs/en/operations/settings/settings.md | 2 +- src/Core/Settings.h | 6 +++--- .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 2 +- src/Storages/ObjectStorage/S3/Configuration.h | 2 ++ .../ObjectStorage/StorageObjectStorage.h | 5 +++-- .../StorageObjectStorageSource.cpp | 19 ++++++++----------- .../StorageObjectStorageSource.h | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 72bd1ca8e2c..88e945a710c 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3667,7 +3667,7 @@ Default value: `0`. ## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist} -Ignore ansense of file if it does not exist when reading certain keys. +Ignore absense of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 066a551b37b..afadaa88f6d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -117,9 +117,9 @@ class IColumn; M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ - M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in S3 table engine", 0) \ - M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ - M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ + M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ + M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 1f3a4bdf6c7..dcb2af9d4d3 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -224,7 +224,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM {}})); } - if (children.size() >= max_keys) + if (max_keys && children.size() >= max_keys) break; } } diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index b28b1c226a7..0bd7f1ab108 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -15,12 +15,14 @@ public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; static constexpr auto type_name = "s3"; + static constexpr auto namespace_name = "bucket"; StorageS3Configuration() = default; StorageS3Configuration(const StorageS3Configuration & other); std::string getTypeName() const override { return type_name; } std::string getEngineName() const override { return url.storage_name; } + std::string getNamespaceType() const override { return namespace_name; } Path getPath() const override { return url.key; } void setPath(const Path & path) override { url.key = path; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 928d49f9604..26b153ca0db 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -151,6 +151,9 @@ public: virtual std::string getTypeName() const = 0; /// Engine name: S3, HDFS, Azure. virtual std::string getEngineName() const = 0; + /// Sometimes object storages have something similar to chroot or namespace, for example + /// buckets in S3. If object storage doesn't have any namepaces return empty string. + virtual std::string getNamespaceType() const { return "namespace"; } virtual Path getPath() const = 0; virtual void setPath(const Path & path) = 0; @@ -159,8 +162,6 @@ public: virtual void setPaths(const Paths & paths) = 0; virtual String getDataSourceDescription() = 0; - /// Sometimes object storages have something similar to chroot or namespace, for example - /// buckets in S3. If object storage doesn't have any namepaces return empty string. virtual String getNamespace() const = 0; virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index e28924617e0..737f733615f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -43,7 +43,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, const ReadFromFormatInfo & info, - std::optional format_settings_, + const std::optional & format_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, @@ -95,7 +95,8 @@ std::shared_ptr StorageObjectStorageSourc local_context->getSettingsRef().max_threads); if (configuration->isNamespaceWithGlobs()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression can not have wildcards inside {} name", configuration->getNamespaceType()); auto settings = configuration->getQuerySettings(local_context); @@ -425,15 +426,13 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne { std::lock_guard lock(next_mutex); auto object_info = nextImplUnlocked(processor); - if (object_info) + if (first_iteration && !object_info && throw_on_zero_files_match) { - if (first_iteration) - first_iteration = false; - } - else if (first_iteration && throw_on_zero_files_match) - { - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files"); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, + "Can not match any files with path {}", + configuration->getPath()); } + first_iteration = false; return object_info; } @@ -456,8 +455,6 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne } new_batch = std::move(result.value()); - LOG_TEST(logger, "Batch size: {}", new_batch.size()); - for (auto it = new_batch.begin(); it != new_batch.end();) { if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher)) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 08d545f9b85..9c67a125f5e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -31,7 +31,7 @@ public: ObjectStoragePtr object_storage_, ConfigurationPtr configuration, const ReadFromFormatInfo & info, - std::optional format_settings_, + const std::optional & format_settings_, ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, From 0202bc1c7550f627bdc8013527804b0ec65e395b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 14 May 2024 18:05:00 +0000 Subject: [PATCH 221/651] Skip the added tests in sanitizers where trace_log is disabled. --- tests/integration/test_trace_log_build_id/test.py | 7 ++++++- tests/queries/0_stateless/03150_trace_log_add_build_id.sql | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_trace_log_build_id/test.py b/tests/integration/test_trace_log_build_id/test.py index 84392ab12b1..fb4316e91bf 100644 --- a/tests/integration/test_trace_log_build_id/test.py +++ b/tests/integration/test_trace_log_build_id/test.py @@ -37,10 +37,15 @@ def test_trace_log_build_id(started_cluster): # We make queries to create entries in trace_log, then restart with new version and verify if the old # trace_log table is renamed and a new trace_log table is created. + if node.is_built_with_sanitizer(): + pytest.skip( + "Sanitizers are skipped, because trace_log is disabled with sanitizers." + ) + query_for_table_name = "EXISTS TABLE system.{table}" node.query( - "SELECT sleep(1)", + "SELECT sleep(2)", query_id=OLD_TEST_QUERY_ID, ) node.query("SYSTEM FLUSH LOGS") diff --git a/tests/queries/0_stateless/03150_trace_log_add_build_id.sql b/tests/queries/0_stateless/03150_trace_log_add_build_id.sql index 1f7bf1c02de..75122de47b5 100644 --- a/tests/queries/0_stateless/03150_trace_log_add_build_id.sql +++ b/tests/queries/0_stateless/03150_trace_log_add_build_id.sql @@ -1,5 +1,9 @@ -SELECT sleep(1); +-- Tags: no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage +SET log_queries = 1; +SET log_query_threads = 1; +SET query_profiler_real_time_period_ns = 100000000; +SELECT sleep(1); SYSTEM FLUSH LOGS; SELECT COUNT(*) > 1 FROM system.trace_log WHERE build_id IS NOT NULL; From 54e9ffd0bf7d9706aabd3690f95ff81bcd5f8eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Wed, 15 May 2024 02:31:19 +0300 Subject: [PATCH 222/651] Move test to new file --- .../test_unusual_path.py | 92 +++++++++++++++++++ .../test_zk_path.py | 74 +-------------- 2 files changed, 96 insertions(+), 70 deletions(-) create mode 100644 tests/integration/test_modify_engine_on_restart/test_unusual_path.py diff --git a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py new file mode 100644 index 00000000000..20d2c29257b --- /dev/null +++ b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py @@ -0,0 +1,92 @@ +import pytest +from test_modify_engine_on_restart.common import check_flags_deleted, set_convert_flags +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +ch1 = cluster.add_instance( + "ch1", + main_configs=[ + "configs/config.d/clusters_zk_path.xml", + "configs/config.d/distributed_ddl.xml", + ], + with_zookeeper=True, + macros={"replica": "node1"}, + stay_alive=True, +) + +database_name = "modify_engine_unusual_path" + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def q(node, query): + return node.query(database=database_name, sql=query) + + +def create_tables(): + # Check one argument + q( + ch1, + "CREATE TABLE replacing_ver ( A Int64, D Date, S String ) ENGINE = ReplacingMergeTree(D) PARTITION BY toYYYYMM(D) ORDER BY A", + ) + + # Check more than one argument + q( + ch1, + "CREATE TABLE collapsing_ver ( ID UInt64, Sign Int8, Version UInt8 ) ENGINE = VersionedCollapsingMergeTree(Sign, Version) ORDER BY ID", + ) + + +def check_tables(): + # Check tables exists + assert ( + q( + ch1, + "SHOW TABLES", + ).strip() + == "collapsing_ver\nreplacing_ver" + ) + + # Check engines + assert ( + q( + ch1, + f"SELECT engine_full FROM system.tables WHERE database = '{database_name}' and name = 'replacing_ver'", + ) + .strip() + .startswith( + "ReplicatedReplacingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', D)" + ) + ) + assert ( + q( + ch1, + f"SELECT engine_full FROM system.tables WHERE database = '{database_name}' and name = 'collapsing_ver'", + ) + .strip() + .startswith( + "ReplicatedVersionedCollapsingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', Sign, Version)" + ) + ) + + +def test_modify_engine_on_restart_with_unusual_path(started_cluster): + ch1.query("CREATE DATABASE " + database_name) + + create_tables() + + set_convert_flags(ch1, database_name, ["replacing_ver", "collapsing_ver"]) + + ch1.restart_clickhouse() + + check_flags_deleted(ch1, database_name, ["replacing_ver", "collapsing_ver"]) + + check_tables() diff --git a/tests/integration/test_modify_engine_on_restart/test_zk_path.py b/tests/integration/test_modify_engine_on_restart/test_zk_path.py index 8bbfe64240f..dd633ad0810 100644 --- a/tests/integration/test_modify_engine_on_restart/test_zk_path.py +++ b/tests/integration/test_modify_engine_on_restart/test_zk_path.py @@ -1,6 +1,5 @@ import pytest from test_modify_engine_on_restart.common import ( - check_flags_deleted, get_table_path, set_convert_flags, ) @@ -18,7 +17,7 @@ ch1 = cluster.add_instance( stay_alive=True, ) -database_name = "modify_engine_unusual_path" +database_name = "modify_engine_zk_path" @pytest.fixture(scope="module") @@ -31,90 +30,25 @@ def started_cluster(): cluster.shutdown() -def q(node, query, database=database_name): - return node.query(database=database, sql=query) - - -def create_tables(): - # Check one argument - q( - ch1, - "CREATE TABLE replacing_ver ( A Int64, D Date, S String ) ENGINE = ReplacingMergeTree(D) PARTITION BY toYYYYMM(D) ORDER BY A", - ) - - # Check more than one argument - q( - ch1, - "CREATE TABLE collapsing_ver ( ID UInt64, Sign Int8, Version UInt8 ) ENGINE = VersionedCollapsingMergeTree(Sign, Version) ORDER BY ID", - ) - - -def check_tables(): - # Check tables exists - assert ( - q( - ch1, - "SHOW TABLES", - ).strip() - == "collapsing_ver\nreplacing_ver" - ) - - # Check engines - assert ( - q( - ch1, - f"SELECT engine_full FROM system.tables WHERE database = '{database_name}' and name = 'replacing_ver'", - ) - .strip() - .startswith( - "ReplicatedReplacingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', D)" - ) - ) - assert ( - q( - ch1, - f"SELECT engine_full FROM system.tables WHERE database = '{database_name}' and name = 'collapsing_ver'", - ) - .strip() - .startswith( - "ReplicatedVersionedCollapsingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', Sign, Version)" - ) - ) - - -def test_modify_engine_on_restart_with_unusual_path(started_cluster): - ch1.query("CREATE DATABASE " + database_name) - - create_tables() - - set_convert_flags(ch1, database_name, ["replacing_ver", "collapsing_ver"]) - - ch1.restart_clickhouse() - - check_flags_deleted(ch1, database_name, ["replacing_ver", "collapsing_ver"]) - - check_tables() +def q(node, query): + return node.query(database=database_name, sql=query) def test_modify_engine_fails_if_zk_path_exists(started_cluster): - database_name = "zk_path" - ch1.query("CREATE DATABASE " + database_name + " ON CLUSTER cluster") + ch1.query("CREATE DATABASE " + database_name) q( ch1, "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", - database_name, ) uuid = q( ch1, f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", - database_name, ).strip("'[]\n") q( ch1, f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/\\'/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", - database_name, ) set_convert_flags(ch1, database_name, ["already_exists_1"]) From 4701431ee89192444f9f08555617d9960af409f8 Mon Sep 17 00:00:00 2001 From: zzyReal666 Date: Wed, 15 May 2024 14:31:47 +0800 Subject: [PATCH 223/651] fix antlr insertStmt --- utils/antlr/ClickHouseParser.g4 | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/utils/antlr/ClickHouseParser.g4 b/utils/antlr/ClickHouseParser.g4 index 13194a8c2d2..30e1cd10f8f 100644 --- a/utils/antlr/ClickHouseParser.g4 +++ b/utils/antlr/ClickHouseParser.g4 @@ -218,9 +218,17 @@ insertStmt: INSERT INTO TABLE? (tableIdentifier | FUNCTION tableFunctionExpr) co columnsClause: LPAREN nestedIdentifier (COMMA nestedIdentifier)* RPAREN; dataClause - : FORMAT identifier # DataClauseFormat - | VALUES # DataClauseValues - | selectUnionStmt SEMICOLON? EOF # DataClauseSelect + : FORMAT identifier # DataClauseFormat + | VALUES assignmentValues (COMMA assignmentValues)* # DataClauseValues + | selectUnionStmt SEMICOLON? EOF # DataClauseSelect + ; + +assignmentValues + : LPAREN assignmentValue (COMMA assignmentValue)* RPAREN + | LPAREN RPAREN + ; +assignmentValue + : literal ; // KILL statement From bdca4c73fc8a3501bb09c28d35c80376751445c5 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 14 May 2024 18:05:00 +0000 Subject: [PATCH 224/651] Skip the added tests in sanitizers where trace_log is disabled, and add a new sanitizer_check_node to facilitate checking sanitizer config existence. --- tests/integration/test_trace_log_build_id/test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_trace_log_build_id/test.py b/tests/integration/test_trace_log_build_id/test.py index fb4316e91bf..8d654aef342 100644 --- a/tests/integration/test_trace_log_build_id/test.py +++ b/tests/integration/test_trace_log_build_id/test.py @@ -16,6 +16,7 @@ node = cluster.add_instance( stay_alive=True, with_installed_binary=True, ) +sanitizer_check_node = cluster.add_instance("sanitizer_check_node") @pytest.fixture(scope="module") @@ -37,7 +38,7 @@ def test_trace_log_build_id(started_cluster): # We make queries to create entries in trace_log, then restart with new version and verify if the old # trace_log table is renamed and a new trace_log table is created. - if node.is_built_with_sanitizer(): + if sanitizer_check_node.is_built_with_sanitizer(): pytest.skip( "Sanitizers are skipped, because trace_log is disabled with sanitizers." ) @@ -67,7 +68,7 @@ def test_trace_log_build_id(started_cluster): ) """ node.query( - "SELECT sleep(1)", + "SELECT sleep(2)", query_id=NEW_TEST_QUERY_ID, ) node.query("SYSTEM FLUSH LOGS") From 13fc7c7cf81c806f5993ca43c4114e4073eb2821 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 10:35:10 +0200 Subject: [PATCH 225/651] Don't allow 0 for max_block_size --- src/Core/SettingsQuirks.cpp | 16 ++++++++++++---- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 16 ++++------------ .../03149_numbers_max_block_size_zero.reference | 3 +-- .../03149_numbers_max_block_size_zero.sh | 7 +++++++ .../03149_numbers_max_block_size_zero.sql | 2 -- 5 files changed, 24 insertions(+), 20 deletions(-) create mode 100755 tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh delete mode 100644 tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 5e7d02dc448..73a0e2a9a6c 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -92,7 +92,7 @@ void applySettingsQuirks(Settings & settings, LoggerPtr log) void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) { - auto getCurrentValue = [¤t_settings](const std::string_view name) -> Field + auto get_current_value = [¤t_settings](const std::string_view name) -> Field { Field current_value; bool has_current_value = current_settings.tryGet(name, current_value); @@ -100,7 +100,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) return current_value; }; - UInt64 max_threads = getCurrentValue("max_threads").get(); + UInt64 max_threads = get_current_value("max_threads").get(); UInt64 max_threads_max_value = 256 * getNumberOfPhysicalCPUCores(); if (max_threads > max_threads_max_value) { @@ -109,7 +109,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) current_settings.set("max_threads", max_threads_max_value); } - constexpr UInt64 max_sane_block_rows_size = 4294967296; // 2^32 + static constexpr UInt64 max_sane_block_rows_size = 4294967296; // 2^32 std::unordered_set block_rows_settings{ "max_block_size", "max_insert_block_size", @@ -120,7 +120,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) "input_format_parquet_max_block_size"}; for (auto const & setting : block_rows_settings) { - auto block_size = getCurrentValue(setting).get(); + auto block_size = get_current_value(setting).get(); if (block_size > max_sane_block_rows_size) { if (log) @@ -128,5 +128,13 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) current_settings.set(setting, max_sane_block_rows_size); } } + + if (auto max_block_size = get_current_value("max_block_size").get(); max_block_size == 0) + { + if (log) + LOG_WARNING(log, "Sanity check: 'max_block_size' cannot be 0. Set to default value {}", DEFAULT_BLOCK_SIZE); + current_settings.set("max_block_size", DEFAULT_BLOCK_SIZE); + } } + } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 759dc7354df..11371578c79 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -176,9 +176,8 @@ protected: { std::lock_guard lock(ranges_state->mutex); - UInt64 need = base_block_size_; - bool without_block_size_limit = need == 0; + UInt64 need = base_block_size_; UInt64 size = 0; /// how many item found. /// find start @@ -186,21 +185,14 @@ protected: end = start; /// find end - while (without_block_size_limit || need != 0) + while (need != 0) { UInt128 can_provide = end.offset_in_ranges == ranges.size() ? static_cast(0) : ranges[end.offset_in_ranges].size - end.offset_in_range; - if (can_provide == 0) break; - if (without_block_size_limit) - { - end.offset_in_ranges++; - end.offset_in_range = 0; - size += static_cast(can_provide); - } - else if (can_provide > need) + if (can_provide > need) { end.offset_in_range += need; size += need; @@ -535,7 +527,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() checkLimits(size_t(total_size)); - if (max_block_size != 0 && total_size / max_block_size < num_streams) + if (total_size / max_block_size < num_streams) num_streams = static_cast(total_size / max_block_size); if (num_streams == 0) diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference index 896f02d1185..d86bac9de59 100644 --- a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.reference @@ -1,2 +1 @@ -1320 -1320 +OK diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh new file mode 100755 index 00000000000..6f70a0d2536 --- /dev/null +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | grep -q "Sanity check: 'max_block_size' cannot be 0. Set to default value" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql deleted file mode 100644 index afc4e4d57a5..00000000000 --- a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0; -SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 1; From 6158332d9104fb8d790bf1ec23df416d640550c9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 11:12:28 +0200 Subject: [PATCH 226/651] Fix race in ReplicatedMergeTreeLogEntryData --- .../MergeTree/ReplicatedMergeTreeLogEntry.h | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 7693f34cc1e..7011794e16d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -9,7 +9,6 @@ #include #include -#include #include @@ -174,7 +173,36 @@ struct ReplicatedMergeTreeLogEntryData size_t quorum = 0; /// Used only in tests for permanent fault injection for particular queue entry. - bool fault_injected = false; + struct CopyableAtomicFlag + { + CopyableAtomicFlag() = default; + + CopyableAtomicFlag(const CopyableAtomicFlag & other) + : value(other.value.load()) + {} + + explicit CopyableAtomicFlag(bool value_) + : value(value_) + {} + + CopyableAtomicFlag & operator=(const CopyableAtomicFlag & other) + { + value = other.value.load(); + return *this; + } + + CopyableAtomicFlag & operator=(bool value_) + { + value = value_; + return *this; + } + + explicit operator bool() const { return value; } + + std::atomic value = false; + }; + + CopyableAtomicFlag fault_injected; /// If this MUTATE_PART entry caused by alter(modify/drop) query. bool isAlterMutation() const From a7b135ea8b8962ec4db318305391881ec1ff4ff8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 15 May 2024 12:42:38 +0200 Subject: [PATCH 227/651] Fix style check --- docs/en/operations/settings/settings.md | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 88e945a710c..131948eace9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3667,7 +3667,7 @@ Default value: `0`. ## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist} -Ignore absense of file if it does not exist when reading certain keys. +Ignore absence of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 996f7da234a..3c72ef0f737 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -253,6 +253,7 @@ DockerHub DoubleDelta Doxygen Durre +doesnt ECMA Ecto EdgeAngle From 4c8bdad0e709b64ed045aed6092a429767370395 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 15 May 2024 12:54:59 +0200 Subject: [PATCH 228/651] Simplify glob iterator --- .../ObjectStorage/StorageObjectStorageCluster.cpp | 8 +++----- .../ObjectStorage/StorageObjectStorageSource.cpp | 15 +++------------ 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 193894a1d44..a43d9da0fa3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -82,11 +82,9 @@ void StorageObjectStorageCluster::updateQueryToSendIfNeeded( RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & local_context) const { - const auto settings = configuration->getQuerySettings(local_context); - auto iterator = std::make_shared( - object_storage, configuration, predicate, virtual_columns, local_context, - nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match, - local_context->getFileProgressCallback()); + auto iterator = StorageObjectStorageSource::createFileIterator( + configuration, object_storage, /* distributed_processing */false, local_context, + predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 737f733615f..8d5df96ca6e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -407,18 +407,9 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } else { - const auto object_key = configuration_->getPath(); - auto object_metadata = object_storage->getObjectMetadata(object_key); - auto object_info = std::make_shared(object_key, object_metadata); - - object_infos.emplace_back(object_info); - if (read_keys) - read_keys->emplace_back(object_info); - - if (file_progress_callback) - file_progress_callback(FileProgress(0, object_metadata.size_bytes)); - - is_finished = true; + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Using glob iterator with path without globs is not allowed (used path: {})", + configuration->getPath()); } } From 3eeb43297d0b884a61c25ecc313397c016bda914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 15 May 2024 13:26:43 +0200 Subject: [PATCH 229/651] Fix clang tidy --- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 219dec28af5..c7baea7cc92 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -368,7 +368,7 @@ public: int32_t metadata_version; Index getIndex() const; - void setIndex(const Columns & index_); + void setIndex(const Columns & cols_); void setIndex(Columns && index_); void unloadIndex(); From 73e11cda7bfaffb7eea5d08b6964d971332f7cce Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 13:28:40 +0200 Subject: [PATCH 230/651] Move to common --- src/Common/CopyableAtomic.h | 39 +++++++++++++++++++ .../MergeTree/ReplicatedMergeTreeLogEntry.h | 32 +-------------- 2 files changed, 41 insertions(+), 30 deletions(-) create mode 100644 src/Common/CopyableAtomic.h diff --git a/src/Common/CopyableAtomic.h b/src/Common/CopyableAtomic.h new file mode 100644 index 00000000000..227fffe927f --- /dev/null +++ b/src/Common/CopyableAtomic.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace DB +{ + +template +struct CopyableAtomic +{ + CopyableAtomic(const CopyableAtomic & other) + : value(other.value.load()) + {} + + explicit CopyableAtomic(T && value_) + : value(std::forward(value_)) + {} + + CopyableAtomic & operator=(const CopyableAtomic & other) + { + value = other.value.load(); + return *this; + } + + CopyableAtomic & operator=(bool value_) + { + value = value_; + return *this; + } + + explicit operator T() const { return value; } + + const T & getValue() const { return value; } + + std::atomic value; +}; + +} diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 7011794e16d..7ff37c609eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -173,36 +174,7 @@ struct ReplicatedMergeTreeLogEntryData size_t quorum = 0; /// Used only in tests for permanent fault injection for particular queue entry. - struct CopyableAtomicFlag - { - CopyableAtomicFlag() = default; - - CopyableAtomicFlag(const CopyableAtomicFlag & other) - : value(other.value.load()) - {} - - explicit CopyableAtomicFlag(bool value_) - : value(value_) - {} - - CopyableAtomicFlag & operator=(const CopyableAtomicFlag & other) - { - value = other.value.load(); - return *this; - } - - CopyableAtomicFlag & operator=(bool value_) - { - value = value_; - return *this; - } - - explicit operator bool() const { return value; } - - std::atomic value = false; - }; - - CopyableAtomicFlag fault_injected; + CopyableAtomic fault_injected{false}; /// If this MUTATE_PART entry caused by alter(modify/drop) query. bool isAlterMutation() const From c860e193800395478ddeeb77ea8ea44efa5d3e3e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 15 May 2024 13:41:34 +0200 Subject: [PATCH 231/651] clickhouse disks reads server configs --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 98d6031edca..400afadb9c9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4467,7 +4467,7 @@ void Context::setApplicationType(ApplicationType type) /// Lock isn't required, you should set it at start shared->application_type = type; - if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER) + if (type == ApplicationType::LOCAL || type == ApplicationType::SERVER || type == ApplicationType::DISKS) shared->server_settings.loadSettingsFromConfig(Poco::Util::Application::instance().config()); if (type == ApplicationType::SERVER) From a09bb5f0b7e2134ec576c3f20b492515cf258432 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 15 May 2024 11:42:11 +0000 Subject: [PATCH 232/651] Fix tests --- .../SerializationDynamicElement.cpp | 2 +- ...3039_dynamic_all_merge_algorithms_1.reference | 16 ++++++++-------- .../03039_dynamic_all_merge_algorithms_1.sh | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index b0a4e63d0a5..dafd6d663b0 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -72,7 +72,7 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( auto dynamic_element_state = std::make_shared(); dynamic_element_state->structure_state = std::move(structure_state); - const auto & variant_type = checkAndGetState(structure_state)->variant_type; + const auto & variant_type = checkAndGetState(dynamic_element_state->structure_state)->variant_type; /// Check if we actually have required element in the Variant. if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) { diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference index 4b4a1e2ab51..6c69b81c183 100644 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference @@ -10,16 +10,16 @@ SummingMergeTree 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 AggregatingMergeTree 100000 String 100000 UInt64 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 MergeTree wide + horizontal merge ReplacingMergeTree 100000 String @@ -32,16 +32,16 @@ SummingMergeTree 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 AggregatingMergeTree 100000 String 100000 UInt64 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 MergeTree compact + vertical merge ReplacingMergeTree 100000 String @@ -54,16 +54,16 @@ SummingMergeTree 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 AggregatingMergeTree 100000 String 100000 UInt64 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 MergeTree wide + vertical merge ReplacingMergeTree 100000 String @@ -76,13 +76,13 @@ SummingMergeTree 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 AggregatingMergeTree 100000 String 100000 UInt64 200000 1 50000 String 100000 UInt64 -50000 2 100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh index 9298fe28fec..198c6ca93ff 100755 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh @@ -30,10 +30,10 @@ function test() $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from test group by sum" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" $CH_CLIENT -nm -q "system start merges test; optimize table test final" $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from test group by sum" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" $CH_CLIENT -q "drop table test" echo "AggregatingMergeTree" @@ -43,10 +43,10 @@ function test() $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" $CH_CLIENT -nm -q "system start merges test; optimize table test final" $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" $CH_CLIENT -q "drop table test" } From 3f789bbfa371a6796ce816b9c34fd05b2cada38a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 15 May 2024 13:53:02 +0200 Subject: [PATCH 233/651] Review comments --- src/Common/ThreadFuzzer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 1a2d3b6ff48..b3e92210ad8 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -153,7 +153,7 @@ bool ThreadFuzzer::isEffective() const return true; \ if (NAME##_before_sleep_probability.load(std::memory_order_relaxed) > 0.0) \ return true; \ - if (NAME##_before_sleep_time_us_max.load(std::memory_order_relaxed) > 0.001) \ + if (NAME##_before_sleep_time_us_max.load(std::memory_order_relaxed) > 0.0) \ return true; \ \ if (NAME##_after_yield_probability.load(std::memory_order_relaxed) > 0.0) \ @@ -162,7 +162,7 @@ bool ThreadFuzzer::isEffective() const return true; \ if (NAME##_after_sleep_probability.load(std::memory_order_relaxed) > 0.0) \ return true; \ - if (NAME##_after_sleep_time_us_max.load(std::memory_order_relaxed) > 0.001) \ + if (NAME##_after_sleep_time_us_max.load(std::memory_order_relaxed) > 0.0) \ return true; FOR_EACH_WRAPPED_FUNCTION(CHECK_WRAPPER_PARAMS) @@ -170,7 +170,7 @@ bool ThreadFuzzer::isEffective() const # undef INIT_WRAPPER_PARAMS #endif - if (explicit_sleep_probability > 0 && sleep_time_us_max > 0.001) + if (explicit_sleep_probability > 0 && sleep_time_us_max > 0) return true; if (explicit_memory_exception_probability > 0) From 12e512c70ddfe32f81f78ee7d58ae47c38d34ee9 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 14:15:45 +0200 Subject: [PATCH 234/651] Delete tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference Removed mispelled file --- .../0.2973_parse_crlf_with_tsv_files.reference | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference diff --git a/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference deleted file mode 100644 index 14cf3a564e4..00000000000 --- a/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference +++ /dev/null @@ -1,12 +0,0 @@ -/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv -<-- Read UNIX endings --> - -Akiba_Hebrew_Academy 2017-08-01 241 -Aegithina_tiphia 2018-02-01 34 -1971-72_Utah_Stars_season 2016-10-01 1 - -<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 --> - -Akiba_Hebrew_Academy 2017-08-01 241 -Aegithina_tiphia 2018-02-01 34 -1971-72_Utah_Stars_season 2016-10-01 1 From 6b6374feb86948c3bbf4ae293233d60b0b733cdc Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 15 May 2024 14:38:10 +0200 Subject: [PATCH 235/651] fix typo --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a0de354a03..05a49029ff8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -799,8 +799,8 @@ class IColumn; M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \ M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \ - M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for sapce reservation in filesystem cache", 0) \ - M(UInt64, temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds, (10 * 60 * 1000), "Wait time to lock cache for sapce reservation for temporary data in filesystem cache", 0) \ + M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for space reservation in filesystem cache", 0) \ + M(UInt64, temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds, (10 * 60 * 1000), "Wait time to lock cache for space reservation for temporary data in filesystem cache", 0) \ \ M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \ M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \ From 53f5b958036d4ef3f69c3a22be96cf4c2e1b8c4a Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 15 May 2024 13:25:44 +0200 Subject: [PATCH 236/651] Fix typo --- docs/en/operations/settings/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 131948eace9..1772a3aa861 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3719,7 +3719,7 @@ Default value: `0`. ## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist} -Ignore ansense of file if it does not exist when reading certain keys. +Ignore absence of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. @@ -3739,7 +3739,7 @@ Default value: `0`. ## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist} -Ignore ansense of file if it does not exist when reading certain keys. +Ignore absence of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. From ae10e7ded1080d5bd72372dc611cdcb7b96137ef Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 15 May 2024 13:09:00 +0000 Subject: [PATCH 237/651] Remove data from all disks after DROP with Lazy database. --- src/Databases/DatabaseOnDisk.cpp | 28 +++--- .../test_lazy_database/__init__.py | 0 .../configs/storage_policy.xml | 12 +++ tests/integration/test_lazy_database/test.py | 88 +++++++++++++++++++ 4 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_lazy_database/__init__.py create mode 100644 tests/integration/test_lazy_database/configs/storage_policy.xml create mode 100644 tests/integration/test_lazy_database/test.py diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 67b45c7d08d..72a9ba318b1 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -324,31 +325,36 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na StoragePtr table = detachTable(local_context, table_name); - /// This is possible for Lazy database. - if (!table) - return; - bool renamed = false; try { fs::rename(table_metadata_path, table_metadata_path_drop); renamed = true; - table->drop(); - table->is_dropped = true; - - fs::path table_data_dir(local_context->getPath() + table_data_path_relative); - if (fs::exists(table_data_dir)) - (void)fs::remove_all(table_data_dir); + // The table might be not loaded for Lazy database engine. + if (table) + { + table->drop(); + table->is_dropped = true; + } } catch (...) { LOG_WARNING(log, getCurrentExceptionMessageAndPattern(/* with_stacktrace */ true)); - attachTable(local_context, table_name, table, table_data_path_relative); + if (table) + attachTable(local_context, table_name, table, table_data_path_relative); if (renamed) fs::rename(table_metadata_path_drop, table_metadata_path); throw; } + for (const auto & [disk_name, disk] : getContext()->getDisksMap()) + { + if (disk->isReadOnly() || !disk->exists(table_data_path_relative)) + continue; + + LOG_INFO(log, "Removing data directory from disk {} with path {} for dropped table {} ", disk_name, table_data_path_relative, table_name); + disk->removeRecursive(table_data_path_relative); + } (void)fs::remove(table_metadata_path_drop); } diff --git a/tests/integration/test_lazy_database/__init__.py b/tests/integration/test_lazy_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_lazy_database/configs/storage_policy.xml b/tests/integration/test_lazy_database/configs/storage_policy.xml new file mode 100644 index 00000000000..58771d6b284 --- /dev/null +++ b/tests/integration/test_lazy_database/configs/storage_policy.xml @@ -0,0 +1,12 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + diff --git a/tests/integration/test_lazy_database/test.py b/tests/integration/test_lazy_database/test.py new file mode 100644 index 00000000000..6890aa87374 --- /dev/null +++ b/tests/integration/test_lazy_database/test.py @@ -0,0 +1,88 @@ +import logging +import time +import pytest +import os +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=["configs/storage_policy.xml"], + with_minio=True, + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def assert_objects_count(cluster, objects_count, path="data/"): + minio = cluster.minio_client + s3_objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) + if objects_count != len(s3_objects): + for s3_object in s3_objects: + object_meta = minio.stat_object(cluster.minio_bucket, s3_object.object_name) + logging.info("Existing S3 object: %s", str(object_meta)) + assert objects_count == len(s3_objects) + + +def list_of_files_on_ch_disk(node, disk, path): + disk_path = node.query( + f"SELECT path FROM system.disks WHERE name='{disk}'" + ).splitlines()[0] + return node.exec_in_container( + ["bash", "-c", f"ls {os.path.join(disk_path, path)}"], user="root" + ) + + +@pytest.mark.parametrize( + "engine", + [ + pytest.param("Log"), + ], +) +@pytest.mark.parametrize( + "disk,check_s3", + [ + pytest.param("default", False), + pytest.param("s3", True), + ], +) +@pytest.mark.parametrize( + "delay", + [ + pytest.param(0), + pytest.param(4), + ], +) +def test_drop_table(cluster, engine, disk, check_s3, delay): + node = cluster.instances["node"] + + node.query("DROP DATABASE IF EXISTS lazy") + node.query("CREATE DATABASE lazy ENGINE=Lazy(2)") + node.query( + "CREATE TABLE lazy.table (id UInt64) ENGINE={} SETTINGS disk = '{}'".format( + engine, + disk, + ) + ) + + node.query("INSERT INTO lazy.table SELECT number FROM numbers(10)") + assert node.query("SELECT count(*) FROM lazy.table") == "10\n" + if delay: + time.sleep(delay) + node.query("DROP TABLE lazy.table SYNC") + + if check_s3: + # There mustn't be any orphaned data + assert_objects_count(cluster, 0) + + # Local data must be removed + assert list_of_files_on_ch_disk(node, disk, "data/lazy/") == "" From 36974a5423d5b6f9f31346f916c1df70e0397858 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 15 May 2024 15:41:43 +0200 Subject: [PATCH 238/651] address comments --- src/Core/ServerSettings.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 8 ++------ src/Storages/StorageReplicatedMergeTree.cpp | 9 ++++++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 04696a94fc2..81a48077a53 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -25,7 +25,7 @@ namespace DB M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \ M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \ M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \ - M(UInt64, max_unexpected_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \ + M(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \ M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \ M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \ M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4c953cd6537..a0888cca5a6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1921,13 +1921,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional(info, name, disk), uncovered, false, nullptr}); + unexpected_unloaded_data_parts.push_back({std::make_shared(info, name, disk), uncovered, /*is_broken*/ false, /*part*/ nullptr}); } if (!unexpected_unloaded_data_parts.empty()) @@ -2005,10 +2005,6 @@ void MergeTreeData::loadUnexpectedDataParts() { load_state.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes } - else - { - load_state.part->renameToDetached("ignored"); /// detached parts must not have '_' in prefixes - } }, Priority{}); } runner.waitForAllToFinishAndRethrowFirstError(); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9a5402f096a..dc23f080013 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1578,7 +1578,6 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) /// Intersection of local parts and expected parts ActiveDataPartSet local_expected_parts_set(format_version); - /// Collect unexpected parts for (const auto & part : parts) { local_expected_parts_set.add(part->name); @@ -1672,8 +1671,6 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) } } - unexpected_data_parts.clear(); - const UInt64 parts_to_fetch_blocks = std::accumulate(parts_to_fetch.cbegin(), parts_to_fetch.cend(), 0, [&](UInt64 acc, const String & part_name) { @@ -1737,6 +1734,12 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) /// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them. queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch)); + /// detached all unexpected data parts after sanity check. + for (auto & part_state : unexpected_data_parts) + { + part_state.part->renameToDetached("ignored"); + } + unexpected_data_parts.clear(); return true; } From 4483da06e819e902c273d6e84d3d58971d73c30c Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 15:42:40 +0200 Subject: [PATCH 239/651] Update rowNumberInXYZ functions --- .../functions/other-functions.md | 105 +++++++++++++++++- 1 file changed, 99 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 2b0215115cb..7c930d23919 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -998,17 +998,110 @@ SELECT version() Returns the build ID generated by a compiler for the running ClickHouse server binary. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. -## blockNumber() +## blockNumber -Returns the sequence number of the data block where the row is located. +Returns the sequence number of the [data block](https://clickhouse.com/docs/en/integrations/python#data-blocks) where the row is located. -## rowNumberInBlock() {#rowNumberInBlock} +**Syntax** + +```sql +blockNumber() +``` + +**Returned value** + +- Sequence number of the data block where the row is located. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT groupUniqArray(blockNumber()) +FROM +( + SELECT * + FROM system.numbers_mt + LIMIT 100000 +) +``` + +Result: + +```response +┌─groupUniqArray(blockNumber())─┐ +│ [6,7] │ +└───────────────────────────────┘ +``` + +## rowNumberInBlock {#rowNumberInBlock} + +Returns the ordinal number of the row in the [data block](https://clickhouse.com/docs/en/integrations/python#data-blocks). Different data blocks are always recalculated. + +**Syntax** + +```sql +rowNumberInBlock() +``` + +**Returned value** + +- Ordinal number of the row in the data block starting from 0. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + min(rowNumberInBlock()), + max(rowNumberInBlock()) +FROM (SELECT * FROM system.numbers_mt LIMIT 100000); +``` + +Result: + +```response +┌─min(rowNumberInBlock())─┬─max(rowNumberInBlock())─┐ +│ 0 │ 65408 │ +└─────────────────────────┴─────────────────────────┘ +``` + +## rowNumberInAllBlocks + +Returns the ordinal number of the row in the [data block](https://clickhouse.com/docs/en/integrations/python#data-blocks). This function only considers the affected data blocks. + +**Syntax** + +```sql +rowNumberInAllBlocks() +``` + +**Returned value** + +- Ordinal number of the row in the data block starting from 0. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + min(rowNumberInAllBlocks()), + max(rowNumberInAllBlocks()) +FROM (SELECT * FROM system.numbers_mt LIMIT 100000); +``` + +Result: + +```response +┌─min(rowNumberInAllBlocks())─┬─max(rowNumberInAllBlocks())─┐ +│ 0 │ 99999 │ +└─────────────────────────────┴─────────────────────────────┘ +``` -Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. -## rowNumberInAllBlocks() -Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. ## neighbor From c93f363be8748070a54aa66e75760e80ddb741ed Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 15 May 2024 13:56:56 +0000 Subject: [PATCH 240/651] Another attempt. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 43 ++++++++++++++++++----- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index d6e1ceb243a..b176f2cd1b5 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -769,6 +769,7 @@ struct IdentifierResolveScope /// Nodes with duplicated aliases std::unordered_set nodes_with_duplicated_aliases; + std::vector cloned_nodes_with_duplicated_aliases; /// Current scope expression in resolve process stack ExpressionsStack expressions_in_resolve_process_stack; @@ -1045,22 +1046,41 @@ private: if (is_lambda_node) { if (scope.alias_name_to_expression_node->contains(alias)) - scope.nodes_with_duplicated_aliases.insert(node->clone()); + { + scope.nodes_with_duplicated_aliases.emplace(node); + auto cloned_node = node->clone(); + scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + scope.nodes_with_duplicated_aliases.emplace(cloned_node); + } auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); if (!inserted) - scope.nodes_with_duplicated_aliases.insert(node->clone()); + { + scope.nodes_with_duplicated_aliases.emplace(node); + auto cloned_node = node->clone(); + scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + scope.nodes_with_duplicated_aliases.emplace(cloned_node); + } return; } if (scope.alias_name_to_lambda_node.contains(alias)) - scope.nodes_with_duplicated_aliases.insert(node->clone()); + { + scope.nodes_with_duplicated_aliases.emplace(node); + auto cloned_node = node->clone(); + scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + scope.nodes_with_duplicated_aliases.emplace(cloned_node); + } auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node)); if (!inserted) - scope.nodes_with_duplicated_aliases.insert(node->clone()); - + { + scope.nodes_with_duplicated_aliases.emplace(node); + auto cloned_node = node->clone(); + scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + scope.nodes_with_duplicated_aliases.emplace(cloned_node); + } /// If node is identifier put it also in scope alias name to lambda node map if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); @@ -6254,6 +6274,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id result_projection_names.push_back(node_alias); } + bool is_duplicated_alias = scope.nodes_with_duplicated_aliases.contains(node); + if (is_duplicated_alias) + scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); + /** Do not use alias table if node has alias same as some other node. * Example: WITH x -> x + 1 AS lambda SELECT 1 AS lambda; * During 1 AS lambda resolve if we use alias table we replace node with x -> x + 1 AS lambda. @@ -6264,7 +6288,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * alias table because in alias table subquery could be evaluated as scalar. */ bool use_alias_table = true; - if (scope.nodes_with_duplicated_aliases.contains(node) || (allow_table_expression && isSubqueryNodeType(node->getNodeType()))) + if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType()))) use_alias_table = false; if (!node_alias.empty() && use_alias_table) @@ -6568,6 +6592,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id } } + if (is_duplicated_alias) + scope.non_cached_identifier_lookups_during_expression_resolve.erase({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); + resolved_expressions.emplace(node, result_projection_names); scope.popExpressionNode(); @@ -6600,7 +6627,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node { auto node_to_resolve = node; auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression); - size_t expected_projection_names_size = 1; if (auto * expression_list = node_to_resolve->as()) { @@ -8208,14 +8234,13 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier * After scope nodes are resolved, we can compare node with duplicate alias with * node from scope alias table. */ - for (const auto & node_with_duplicated_alias : scope.nodes_with_duplicated_aliases) + for (const auto & node_with_duplicated_alias : scope.cloned_nodes_with_duplicated_aliases) { auto node = node_with_duplicated_alias; auto node_alias = node->getAlias(); /// Add current alias to non cached set, because in case of cyclic alias identifier should not be substituted from cache. /// See 02896_cyclic_aliases_crash. - scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); resolveExpressionNode(node, scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/); bool has_node_in_alias_table = false; From 30bee3324cc360b6ac81aed834cad9d1cb7cd666 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 15 May 2024 15:15:14 +0100 Subject: [PATCH 241/651] impl --- src/Planner/findParallelReplicasQuery.cpp | 22 ++++++++++--------- ...3_parallel_replicas_mat_view_bug.reference | 2 ++ .../03143_parallel_replicas_mat_view_bug.sql | 13 +++++++++++ 3 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference create mode 100644 tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index ef640bcd42d..17a2f09c4f2 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -1,24 +1,25 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include #include #include +#include +#include +#include #include #include +#include +#include +#include +#include #include #include +#include #include #include +#include +#include namespace DB { @@ -316,7 +317,8 @@ static const TableNode * findTableForParallelReplicas(const IQueryTreeNode * que case QueryTreeNodeType::TABLE: { const auto & table_node = query_tree_node->as(); - const auto & storage = table_node.getStorage(); + const auto * as_mat_view = typeid_cast(table_node.getStorage().get()); + const auto & storage = as_mat_view ? as_mat_view->getTargetTable() : table_node.getStorage(); if (std::dynamic_pointer_cast(storage) || typeid_cast(storage.get())) return &table_node; diff --git a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference new file mode 100644 index 00000000000..dec2cbe1fa3 --- /dev/null +++ b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference @@ -0,0 +1,2 @@ +test +test diff --git a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql new file mode 100644 index 00000000000..d6a0d66083d --- /dev/null +++ b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS mv_table; +DROP TABLE IF EXISTS null_table; + +SET cluster_for_parallel_replicas='parallel_replicas', max_parallel_replicas=4, allow_experimental_parallel_reading_from_replicas=1; +SET allow_experimental_analyzer=1; + +CREATE TABLE null_table (str String) ENGINE = Null; +CREATE MATERIALIZED VIEW mv_table (str String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03143_parallel_replicas_mat_view_bug', '{replica}') ORDER BY str AS SELECT str AS str FROM null_table; +INSERT INTO null_table VALUES ('test'); + +SELECT * FROM mv_table; + +SELECT * FROM merge('xxx', '^.inner_id.*'); From 1be205e3f4528b6d09775dee5560b26ee99db67b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 16:22:25 +0200 Subject: [PATCH 242/651] Update anova documentation --- .../reference/analysis_of_variance.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md index 7f0df74010b..d9b44b3ff07 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -21,24 +21,25 @@ Aliases: `anova` :::note Groups are enumerated starting from 0 and there should be at least two groups to perform a test. -Moreover there should be at least one group with the number of observations greater than one. +There should be at least one group with the number of observations greater than one. ::: **Returned value** -- `(f_statistic, p_value)`. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)) - -**Implementation details** - +- `(f_statistic, p_value)`. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** Query: ```sql +SELECT analysisOfVariance(number, number % 2) FROM numbers(1048575); ``` Result: ```response +┌─analysisOfVariance(number, modulo(number, 2))─┐ +│ (0,1) │ +└───────────────────────────────────────────────┘ ``` From 47dfeaa487743d81c66bb280e8eeb8f31ef21507 Mon Sep 17 00:00:00 2001 From: copperybean Date: Sun, 12 May 2024 21:57:37 +0800 Subject: [PATCH 243/651] fix comments Change-Id: I2677dc20fc515bbbe91f54154fc4c081f164758e --- .../Formats/Impl/Parquet/ParquetDataBuffer.h | 9 +- .../Impl/Parquet/ParquetDataValuesReader.cpp | 18 +- .../Impl/Parquet/ParquetDataValuesReader.h | 13 +- .../Impl/Parquet/ParquetLeafColReader.cpp | 33 +- .../Impl/Parquet/ParquetRecordReader.cpp | 326 +++++++++++++----- .../Impl/Parquet/ParquetRecordReader.h | 6 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 + .../02998_native_parquet_reader.sh | 4 +- .../native_parquet_reader.parquet} | Bin 9 files changed, 296 insertions(+), 115 deletions(-) rename tests/queries/0_stateless/{02998_native_parquet_reader.parquet => data_parquet/native_parquet_reader.parquet} (100%) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h index 5c37375fa0c..57df6f59f72 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h @@ -48,7 +48,7 @@ public: consume(bytes); } - void ALWAYS_INLINE readDateTime64(DateTime64 & dst) + void ALWAYS_INLINE readDateTime64FromInt96(DateTime64 & dst) { static const int max_scale_num = 9; static const UInt64 pow10[max_scale_num + 1] @@ -110,10 +110,7 @@ public: // refer to: RawBytesToDecimalBytes in reader_internal.cc, Decimal128::FromBigEndian in decimal.cc auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num); - if (unlikely(!status.ok())) - { - throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Read parquet decimal failed: {}", status.status().ToString()); - } + assert(status.ok()); status.ValueUnsafe().ToBytes(reinterpret_cast(out)); consume(elem_bytes_num); } @@ -144,7 +141,7 @@ private: class LazyNullMap { public: - LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {} + explicit LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {} template requires std::is_integral_v diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp index 6743086e9e6..1f0c7105572 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -276,8 +276,7 @@ void ParquetPlainValuesReader::readBatch( auto idx = cursor; cursor += count; - // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is available - for (auto val_offset = offset_data[idx - 1]; idx < cursor; idx++) + for (auto val_offset = chars_size_bak; idx < cursor; idx++) { offset_data[idx] = ++val_offset; } @@ -288,7 +287,7 @@ void ParquetPlainValuesReader::readBatch( template <> -void ParquetPlainValuesReader>::readBatch( +void ParquetPlainValuesReader, ParquetReaderTypes::TimestampInt96>::readBatch( MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) { auto cursor = col_ptr->size(); @@ -302,21 +301,21 @@ void ParquetPlainValuesReader>::readBatch( null_map, /* individual_visitor */ [&](size_t nest_cursor) { - plain_data_buffer.readDateTime64(column_data[nest_cursor]); + plain_data_buffer.readDateTime64FromInt96(column_data[nest_cursor]); }, /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count) { auto * col_data_pos = column_data + nest_cursor; for (UInt32 i = 0; i < count; i++) { - plain_data_buffer.readDateTime64(col_data_pos[i]); + plain_data_buffer.readDateTime64FromInt96(col_data_pos[i]); } } ); } -template -void ParquetPlainValuesReader::readBatch( +template +void ParquetPlainValuesReader::readBatch( MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) { auto cursor = col_ptr->size(); @@ -542,11 +541,14 @@ void ParquetRleDictReader::readBatch( template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; template class ParquetPlainValuesReader; +template class ParquetPlainValuesReader; template class ParquetPlainValuesReader; template class ParquetPlainValuesReader; template class ParquetPlainValuesReader>; template class ParquetPlainValuesReader>; +template class ParquetPlainValuesReader>; template class ParquetPlainValuesReader; template class ParquetFixedLenPlainReader>; @@ -557,7 +559,9 @@ template class ParquetRleLCReader; template class ParquetRleLCReader; template class ParquetRleDictReader; +template class ParquetRleDictReader; template class ParquetRleDictReader; +template class ParquetRleDictReader; template class ParquetRleDictReader; template class ParquetRleDictReader; template class ParquetRleDictReader>; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h index 688de4f52eb..0f916ff862d 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -24,7 +24,7 @@ public: /** * @brief Used when the bit_width is 0, so all elements have same value. */ - RleValuesReader(UInt32 total_size, Int32 val = 0) + explicit RleValuesReader(UInt32 total_size, Int32 val = 0) : bit_reader(nullptr), bit_width(0), cur_group_size(total_size), cur_value(val), cur_group_is_packed(false) {} @@ -72,7 +72,8 @@ public: * @tparam SteppedValidVisitor A callback with signature: * void(size_t cursor, const std::vector & valid_index_steps) * for n valid elements with null value interleaved in a BitPacked group, - * i-th item in valid_index_steps describes how many elements in column there are after (i-1)-th valid element. + * i-th item in valid_index_steps describes how many elements there are + * from i-th valid element (include) to (i+1)-th valid element (exclude). * * take following BitPacked group with 2 valid elements for example: * null valid null null valid null @@ -138,10 +139,16 @@ public: using ParquetDataValuesReaderPtr = std::unique_ptr; +enum class ParquetReaderTypes +{ + Normal, + TimestampInt96, +}; + /** * The definition level is RLE or BitPacked encoding, while data is read directly */ -template +template class ParquetPlainValuesReader : public ParquetDataValuesReader { public: diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp index 52dfad7606a..9e1cae9bb65 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp @@ -110,16 +110,24 @@ ColumnPtr readDictPage( template <> ColumnPtr readDictPage>( const parquet::DictionaryPage & page, - const parquet::ColumnDescriptor & /* col_des */, + const parquet::ColumnDescriptor & col_des, const DataTypePtr & data_type) { + const auto & datetime_type = assert_cast(*data_type); auto dict_col = ColumnDecimal::create(page.num_values(), datetime_type.getScale()); auto * col_data = dict_col->getData().data(); ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale()); - for (auto i = 0; i < page.num_values(); i++) + if (col_des.physical_type() == parquet::Type::INT64) { - buffer.readDateTime64(col_data[i]); + buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(Int64)); + } + else + { + for (auto i = 0; i < page.num_values(); i++) + { + buffer.readDateTime64FromInt96(col_data[i]); + } } return dict_col; } @@ -190,8 +198,12 @@ std::unique_ptr createPlainReader( RleValuesReaderPtr def_level_reader, ParquetDataBuffer buffer) { - return std::make_unique>( - col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); + if (std::is_same_v> && col_des.physical_type() == parquet::Type::INT96) + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); + else + return std::make_unique>( + col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer)); } @@ -287,6 +299,7 @@ void ParquetLeafColReader::degradeDictionary() null_map = std::make_unique(reading_rows_num); auto col_existing = std::move(column); column = ColumnString::create(); + reserveColumnStrRows(column, reading_rows_num); ColumnString & col_dest = *static_cast(column.get()); const ColumnString & col_dict_str = *static_cast(dictionary.get()); @@ -294,8 +307,9 @@ void ParquetLeafColReader::degradeDictionary() visitColStrIndexType(dictionary->size(), [&](TColVec *) { const TColVec & col_src = *static_cast(col_existing.get()); - reserveColumnStrRows(column, reading_rows_num); + // It will be easier to create a ColumnLowCardinality and call convertToFullColumn() on it, + // while the performance loss is ignorable, the implementation can be updated next time. col_dest.getOffsets().resize(col_src.size()); for (size_t i = 0; i < col_src.size(); i++) { @@ -378,6 +392,11 @@ void ParquetLeafColReader::readPage() LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name()); dictionary = readDictPage(dict_page, col_descriptor, base_data_type); + if (unlikely(dictionary->size() < 2)) + { + // must not small than ColumnUnique::numSpecialValues() + dictionary->assumeMutable()->insertManyDefaults(2); + } if (std::is_same_v) { reading_low_cardinality = true; @@ -508,7 +527,9 @@ std::unique_ptr ParquetLeafColReader::createDi template class ParquetLeafColReader; +template class ParquetLeafColReader; template class ParquetLeafColReader; +template class ParquetLeafColReader; template class ParquetLeafColReader; template class ParquetLeafColReader; template class ParquetLeafColReader; diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 9cde433b983..fddd8059925 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -36,8 +36,7 @@ namespace ErrorCodes try { (s); } \ catch (const ::parquet::ParquetException & e) \ { \ - auto msg = PreformattedMessage::create("Excepted when reading parquet: {}", e.what()); \ - throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION); \ + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Parquet exception: {}", e.what()); \ } \ } while (false) @@ -45,102 +44,252 @@ namespace { std::unique_ptr createFileReader( - std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file) + std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, + std::shared_ptr metadata = nullptr) { std::unique_ptr res; - THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open(std::move(arrow_file))); + THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open( + std::move(arrow_file), + parquet::default_reader_properties(), + metadata)); return res; } -std::unique_ptr createColReader( - const parquet::ColumnDescriptor & col_descriptor, - DataTypePtr ch_type, - std::unique_ptr meta, - std::unique_ptr reader) +class ColReaderFactory { - if (col_descriptor.logical_type()->is_date() && parquet::Type::INT32 == col_descriptor.physical_type()) +public: + ColReaderFactory( + const parquet::ArrowReaderProperties & reader_properties_, + const parquet::ColumnDescriptor & col_descriptor_, + DataTypePtr ch_type_, + std::unique_ptr meta_, + std::unique_ptr page_reader_) + : reader_properties(reader_properties_) + , col_descriptor(col_descriptor_) + , ch_type(std::move(ch_type_)) + , meta(std::move(meta_)) + , page_reader(std::move(page_reader_)) {} + + std::unique_ptr makeReader(); + +private: + const parquet::ArrowReaderProperties & reader_properties; + const parquet::ColumnDescriptor & col_descriptor; + DataTypePtr ch_type; + std::unique_ptr meta; + std::unique_ptr page_reader; + + + UInt32 getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit); + UInt32 getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit); + + std::unique_ptr fromInt32(); + std::unique_ptr fromInt64(); + std::unique_ptr fromByteArray(); + std::unique_ptr fromFLBA(); + + std::unique_ptr fromInt32INT(const parquet::IntLogicalType & int_type); + std::unique_ptr fromInt64INT(const parquet::IntLogicalType & int_type); + + template + auto makeLeafReader() { - return std::make_unique>( - col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); + return std::make_unique>( + col_descriptor, std::make_shared(), std::move(meta), std::move(page_reader)); } - else if (col_descriptor.logical_type()->is_decimal()) + + template + auto makeDecimalLeafReader() { - switch (col_descriptor.physical_type()) + auto data_type = std::make_shared>( + col_descriptor.type_precision(), col_descriptor.type_scale()); + return std::make_unique>>( + col_descriptor, std::move(data_type), std::move(meta), std::move(page_reader)); + } + + std::unique_ptr throwUnsupported(std::string msg = "") + { + throw Exception( + ErrorCodes::PARQUET_EXCEPTION, + "Unsupported logical type: {} and physical type: {} for field =={}=={}", + col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg); + } +}; + +UInt32 ColReaderFactory::getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit) +{ + switch (tm_unit) + { + case parquet::LogicalType::TimeUnit::MILLIS: + return 3; + case parquet::LogicalType::TimeUnit::MICROS: + return 6; + case parquet::LogicalType::TimeUnit::NANOS: + return 9; + default: + throwUnsupported(PreformattedMessage::create(", invalid timestamp unit: {}", tm_unit)); + return 0; + } +} + +UInt32 ColReaderFactory::getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit) +{ + switch (tm_unit) + { + case arrow::TimeUnit::MILLI: + return 3; + case arrow::TimeUnit::MICRO: + return 6; + case arrow::TimeUnit::NANO: + return 9; + default: + throwUnsupported(PreformattedMessage::create(", invalid arrow time unit: {}", tm_unit)); + return 0; + } +} + +std::unique_ptr ColReaderFactory::fromInt32() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::INT: + return fromInt32INT(dynamic_cast(*col_descriptor.logical_type())); + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + case parquet::LogicalType::Type::DATE: + return makeLeafReader(); + case parquet::LogicalType::Type::DECIMAL: + return makeDecimalLeafReader(); + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromInt64() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::INT: + return fromInt64INT(dynamic_cast(*col_descriptor.logical_type())); + case parquet::LogicalType::Type::NONE: + return makeLeafReader(); + case parquet::LogicalType::Type::TIMESTAMP: { - case parquet::Type::INT32: - { - auto data_type = std::make_shared( - col_descriptor.type_precision(), col_descriptor.type_scale()); - return std::make_unique>>( - col_descriptor, data_type, std::move(meta), std::move(reader)); - } - case parquet::Type::INT64: - { - auto data_type = std::make_shared( - col_descriptor.type_precision(), col_descriptor.type_scale()); - return std::make_unique>>( - col_descriptor, data_type, std::move(meta), std::move(reader)); - } - case parquet::Type::FIXED_LEN_BYTE_ARRAY: - { - if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) - { - auto data_type = std::make_shared( - col_descriptor.type_precision(), col_descriptor.type_scale()); - return std::make_unique>>( - col_descriptor, data_type, std::move(meta), std::move(reader)); - } - else - { - auto data_type = std::make_shared( - col_descriptor.type_precision(), col_descriptor.type_scale()); - return std::make_unique>>( - col_descriptor, data_type, std::move(meta), std::move(reader)); - } - } - default: - throw Exception( - ErrorCodes::PARQUET_EXCEPTION, - "Type not supported for decimal: {}", - col_descriptor.physical_type()); + const auto & tm_type = dynamic_cast(*col_descriptor.logical_type()); + auto read_type = std::make_shared(getScaleFromLogicalTimestamp(tm_type.time_unit())); + return std::make_unique>>( + col_descriptor, std::move(read_type), std::move(meta), std::move(page_reader)); } + case parquet::LogicalType::Type::DECIMAL: + return makeDecimalLeafReader(); + default: + return throwUnsupported(); } - else +} + +std::unique_ptr ColReaderFactory::fromByteArray() +{ + switch (col_descriptor.logical_type()->type()) { - switch (col_descriptor.physical_type()) - { - case parquet::Type::INT32: - return std::make_unique>( - col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); - case parquet::Type::INT64: - return std::make_unique>( - col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); - case parquet::Type::FLOAT: - return std::make_unique>( - col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); - case parquet::Type::INT96: - { - DataTypePtr read_type = ch_type; - if (!isDateTime64(ch_type)) - { - read_type = std::make_shared(ParquetRecordReader::default_datetime64_scale); - } - return std::make_unique>>( - col_descriptor, read_type, std::move(meta), std::move(reader)); - } - case parquet::Type::DOUBLE: - return std::make_unique>( - col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); - case parquet::Type::BYTE_ARRAY: - return std::make_unique>( - col_descriptor, std::make_shared(), std::move(meta), std::move(reader)); - default: - throw Exception( - ErrorCodes::PARQUET_EXCEPTION, "Type not supported: {}", col_descriptor.physical_type()); - } + case parquet::LogicalType::Type::STRING: + return makeLeafReader(); + default: + return throwUnsupported(); } } +std::unique_ptr ColReaderFactory::fromFLBA() +{ + switch (col_descriptor.logical_type()->type()) + { + case parquet::LogicalType::Type::DECIMAL: + { + if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) + return makeDecimalLeafReader(); + else if (col_descriptor.type_length() <= static_cast(sizeof(Decimal256))) + return makeDecimalLeafReader(); + + return throwUnsupported(PreformattedMessage::create( + ", invalid type length: {}", col_descriptor.type_length())); + } + default: + return throwUnsupported(); + } +} + +std::unique_ptr ColReaderFactory::fromInt32INT(const parquet::IntLogicalType & int_type) +{ + switch (int_type.bit_width()) + { + case sizeof(Int32): + { + if (int_type.is_signed()) + return makeLeafReader(); + else + return makeLeafReader(); + } + default: + return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width())); + } +} + +std::unique_ptr ColReaderFactory::fromInt64INT(const parquet::IntLogicalType & int_type) +{ + switch (int_type.bit_width()) + { + case sizeof(Int64): + { + if (int_type.is_signed()) + return makeLeafReader(); + else + return makeLeafReader(); + } + default: + return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width())); + } +} + +// refer: GetArrowType method in schema_internal.cc of arrow +std::unique_ptr ColReaderFactory::makeReader() +{ + // this method should to be called only once for each instance + SCOPE_EXIT({ page_reader = nullptr; }); + assert(page_reader); + + switch (col_descriptor.physical_type()) + { + case parquet::Type::BOOLEAN: + break; + case parquet::Type::INT32: + return fromInt32(); + case parquet::Type::INT64: + return fromInt64(); + case parquet::Type::INT96: + { + DataTypePtr read_type = ch_type; + if (!isDateTime64(ch_type)) + { + auto scale = getScaleFromArrowTimeUnit(reader_properties.coerce_int96_timestamp_unit()); + read_type = std::make_shared(scale); + } + return std::make_unique>>( + col_descriptor, read_type, std::move(meta), std::move(page_reader)); + } + case parquet::Type::FLOAT: + return makeLeafReader(); + case parquet::Type::DOUBLE: + return makeLeafReader(); + case parquet::Type::BYTE_ARRAY: + return fromByteArray(); + case parquet::Type::FIXED_LEN_BYTE_ARRAY: + return fromFLBA(); + default: + break; + } + + return throwUnsupported(); +} + } // anonymous namespace ParquetRecordReader::ParquetRecordReader( @@ -148,8 +297,9 @@ ParquetRecordReader::ParquetRecordReader( parquet::ArrowReaderProperties reader_properties_, std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, const FormatSettings & format_settings, - std::vector row_groups_indices_) - : file_reader(createFileReader(std::move(arrow_file))) + std::vector row_groups_indices_, + std::shared_ptr metadata) + : file_reader(createFileReader(std::move(arrow_file), std::move(metadata))) , reader_properties(reader_properties_) , header(std::move(header_)) , max_block_size(format_settings.parquet.max_block_size) @@ -210,15 +360,17 @@ void ParquetRecordReader::loadNextRowGroup() column_readers.clear(); for (size_t i = 0; i < parquet_col_indice.size(); i++) { - column_readers.emplace_back(createColReader( + ColReaderFactory factory( + reader_properties, *file_reader->metadata()->schema()->Column(parquet_col_indice[i]), header.getByPosition(i).type, cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]), - cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i]))); + cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i])); + column_readers.emplace_back(factory.makeReader()); } auto duration = watch.elapsedNanoseconds() / 1e6; - LOG_DEBUG(log, "reading row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration); + LOG_DEBUG(log, "begin to read row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration); ++next_row_group_idx; cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows(); diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h index 4789be59ec8..2f728a586a0 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h @@ -22,13 +22,11 @@ public: parquet::ArrowReaderProperties reader_properties_, std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file, const FormatSettings & format_settings, - std::vector row_groups_indices_); + std::vector row_groups_indices_, + std::shared_ptr metadata = nullptr); Chunk readChunk(); - // follow the scale generated by spark - static constexpr UInt8 default_datetime64_scale = 9; - private: std::unique_ptr file_reader; parquet::ArrowReaderProperties reader_properties; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 2e849f09fda..7fc7b9c3cab 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -3,6 +3,7 @@ #if USE_PARQUET +#include #include #include #include @@ -623,6 +624,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un return; } + // TODO support defaults_for_omitted_fields feature when supporting nested columns auto num_rows = chunk.getNumRows(); res = get_pending_chunk(num_rows, std::move(chunk)); } diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh index 4e5169c4bf0..d6369c4921b 100755 --- a/tests/queries/0_stateless/02998_native_parquet_reader.sh +++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -PAR_PATH="$CURDIR"/02998_native_parquet_reader.parquet +PAR_PATH="$CURDIR"/data_parquet/native_parquet_reader.parquet # the content of parquet file can be generated by following codes # < Date: Wed, 15 May 2024 17:24:56 +0200 Subject: [PATCH 244/651] fix tests --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a0888cca5a6..e5590900f8f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1921,7 +1921,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional Date: Wed, 15 May 2024 17:28:47 +0200 Subject: [PATCH 245/651] Handle concurrent deletion on nodes --- programs/keeper-client/Commands.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 2ec43ae15d0..ff60e7af6a1 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -230,7 +230,8 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client auto path = client->getAbsolutePath(query->args[1].safeGet()); Coordination::Stat stat; - client->zookeeper->get(path, &stat); + if (!client->zookeeper->exists(path, &stat)) + return; /// It is ok if node was deleted meanwhile if (stat.numChildren >= static_cast(threshold)) { @@ -238,11 +239,17 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client return; } - auto children = client->zookeeper->getChildren(path); + Strings children; + auto status = client->zookeeper->tryGetChildren(path, children); + if (status == Coordination::Error::ZNONODE) + return; /// It is ok if node was deleted meanwhile + else if (status != Coordination::Error::ZOK) + throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string()); + std::sort(children.begin(), children.end()); + auto next_query = *query; for (const auto & child : children) { - auto next_query = *query; next_query.args[1] = DB::Field(path / child); execute(&next_query, client); } From a5a614bff78141c4be7c1cb057c7539699acdf7f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 15 May 2024 17:30:17 +0200 Subject: [PATCH 246/651] Traverse supernode tree because it can have other supernodes inside --- programs/keeper-client/Commands.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index ff60e7af6a1..50f54d4dabf 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -234,10 +234,7 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client return; /// It is ok if node was deleted meanwhile if (stat.numChildren >= static_cast(threshold)) - { std::cout << static_cast(path) << "\t" << stat.numChildren << "\n"; - return; - } Strings children; auto status = client->zookeeper->tryGetChildren(path, children); From fc5961eaa7cb77e4be372b6b6875c97057e197d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 15 May 2024 17:31:41 +0200 Subject: [PATCH 247/651] Full tidy fix --- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c7baea7cc92..a184d00f143 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -369,7 +369,7 @@ public: Index getIndex() const; void setIndex(const Columns & cols_); - void setIndex(Columns && index_); + void setIndex(Columns && cols_); void unloadIndex(); /// For data in RAM ('index') From 4de7773783d8d31b36f40e2951358b67f30c946b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 15 May 2024 17:50:46 +0200 Subject: [PATCH 248/651] Properly calculate number of nodes in subtree --- programs/keeper-client/Commands.cpp | 41 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 50f54d4dabf..221da4fe976 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -314,31 +314,34 @@ bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr & return true; } +/// DFS the subtree and return the number of nodes in the subtree +static Int64 traverse(const fs::path & path, KeeperClient * client, std::vector> & result) +{ + Int64 nodes_in_subtree = 1; + + Strings children; + auto status = client->zookeeper->tryGetChildren(path, children); + if (status == Coordination::Error::ZNONODE) + return 0; + else if (status != Coordination::Error::ZOK) + throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string()); + + for (auto & child : children) + nodes_in_subtree += traverse(path / child, client, result); + + result.emplace_back(nodes_in_subtree, path.string()); + + return nodes_in_subtree; +} + void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const { auto path = client->getAbsolutePath(query->args[0].safeGet()); auto n = query->args[1].safeGet(); - std::vector> result; + std::vector> result; - std::queue queue; - queue.push(path); - while (!queue.empty()) - { - auto next_path = queue.front(); - queue.pop(); - - auto children = client->zookeeper->getChildren(next_path); - for (auto & child : children) - child = next_path / child; - auto response = client->zookeeper->get(children); - - for (size_t i = 0; i < response.size(); ++i) - { - result.emplace_back(response[i].stat.numChildren, children[i]); - queue.push(children[i]); - } - } + traverse(path, client, result); std::sort(result.begin(), result.end(), std::greater()); for (UInt64 i = 0; i < std::min(result.size(), static_cast(n)); ++i) From af84e3e1a9c8c3b62f738f231e457efc8ea301ea Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 15 May 2024 17:13:32 +0100 Subject: [PATCH 249/651] fix --- .../0_stateless/03143_parallel_replicas_mat_view_bug.reference | 1 - .../0_stateless/03143_parallel_replicas_mat_view_bug.sql | 2 -- 2 files changed, 3 deletions(-) diff --git a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference index dec2cbe1fa3..9daeafb9864 100644 --- a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference +++ b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.reference @@ -1,2 +1 @@ test -test diff --git a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql index d6a0d66083d..97ed29802c7 100644 --- a/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql +++ b/tests/queries/0_stateless/03143_parallel_replicas_mat_view_bug.sql @@ -9,5 +9,3 @@ CREATE MATERIALIZED VIEW mv_table (str String) ENGINE = ReplicatedMergeTree('/cl INSERT INTO null_table VALUES ('test'); SELECT * FROM mv_table; - -SELECT * FROM merge('xxx', '^.inner_id.*'); From 2a87f01890b32c7126805d409e936981209f0641 Mon Sep 17 00:00:00 2001 From: Tomer Shafir <86107951+tomershafir@users.noreply.github.com> Date: Wed, 15 May 2024 19:15:53 +0300 Subject: [PATCH 250/651] fix io_uring.xml --- tests/performance/io_uring.xml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/performance/io_uring.xml b/tests/performance/io_uring.xml index 59674894aae..7384c0068bb 100644 --- a/tests/performance/io_uring.xml +++ b/tests/performance/io_uring.xml @@ -3,11 +3,5 @@ io_uring - CREATE TABLE hits_none (WatchID UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() - INSERT INTO hits_none SELECT WatchID FROM test.hits - OPTIMIZE TABLE hits_none FINAL - - - - DROP TABLE hits_none + From e9d2b54de67868bb1249511809e8b0f768547b63 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Wed, 15 May 2024 18:17:50 +0200 Subject: [PATCH 251/651] Reformat test file --- .../test_case.py | 56 +++++++++++++------ 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_host_resolver_fail_count/test_case.py b/tests/integration/test_host_resolver_fail_count/test_case.py index 895d764a268..fa23ecf3e74 100644 --- a/tests/integration/test_host_resolver_fail_count/test_case.py +++ b/tests/integration/test_host_resolver_fail_count/test_case.py @@ -11,6 +11,7 @@ node = cluster.add_instance( with_minio=True, ) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -20,6 +21,7 @@ def start_cluster(): finally: cluster.shutdown() + # The same value as in ClickHouse, this can't be confugured via config now DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2*60 @@ -27,31 +29,39 @@ DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2*60 def test_host_resolver(start_cluster): minio_ip = cluster.get_instance_ip("minio1") - # drop DNS cache - node.set_hosts([ - (minio_ip, "minio1"), - (node.ip_address, "minio1"), # no answer on 9001 port on this IP - ]) + # drop DNS cache + node.set_hosts( + [ + (minio_ip, "minio1"), + (node.ip_address, "minio1"), # no answer on 9001 port on this IP + ] + ) node.query("SYSTEM DROP DNS CACHE") node.query("SYSTEM DROP CONNECTIONS CACHE") - node.query(""" + node.query( + """ CREATE TABLE test (key UInt32, value UInt32) Engine=MergeTree() ORDER BY key PARTITION BY key SETTINGS storage_policy='s3' - """) + """ + ) initial_fails = "0\n" k = 0 limit = 100 while initial_fails == "0\n": - node.query(f""" + node.query( + f""" INSERT INTO test VALUES (0,{k}) - """) + """ + ) # HostResolver chooses IP randomly, so on single call can choose worked ID - initial_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + initial_fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) k += 1 if k >= limit: # Dead IP was not choosen for 100 iteration. @@ -63,10 +73,14 @@ def test_host_resolver(start_cluster): for j in range(10): for i in range(10): - node.query(f""" + node.query( + f""" INSERT INTO test VALUES ({i+1},{j+1}) - """) - fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + """ + ) + fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) assert fails == initial_fails # Check that clickhouse tries to recheck IP after 2 minutes @@ -75,9 +89,11 @@ def test_host_resolver(start_cluster): intermediate_fails = initial_fails limit = k + 100 while intermediate_fails == initial_fails: - node.query(f""" + node.query( + f""" INSERT INTO test VALUES (101,{k}) - """) + """ + ) intermediate_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") k += 1 if k >= limit: @@ -92,10 +108,14 @@ def test_host_resolver(start_cluster): initial_fails = intermediate_fails limit = k + 100 while intermediate_fails == initial_fails: - node.query(f""" + node.query( + f""" INSERT INTO test VALUES (102,{k}) - """) - intermediate_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + """ + ) + intermediate_fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) k += 1 if k >= limit: break From 7f05ccaddf8c80c1c9d76d2a818c1751681c714b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 15 May 2024 16:20:42 +0000 Subject: [PATCH 252/651] Update lambda execution name. Fix query tree hash calculation in case of empty database. --- src/Planner/PlannerActionsVisitor.cpp | 20 +++++++++++++++++-- ...tion_memory_efficient_mix_levels.reference | 11 ++++++++++ ...ggregation_memory_efficient_mix_levels.sql | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 2b369eaa593..49a9305b3ce 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -243,8 +243,24 @@ public: } case QueryTreeNodeType::LAMBDA: { - auto lambda_hash = node->getTreeHash(); - result = "__lambda_" + toString(lambda_hash); + WriteBufferFromOwnString buffer; + + const auto & lambda_node = node->as(); + const auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes(); + + size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size(); + for (size_t i = 0; i < lambda_arguments_nodes_size; ++i) + { + const auto & lambda_arguments_node = lambda_arguments_nodes[i]; + buffer << calculateActionNodeName(lambda_arguments_node); + + if (i + 1 != lambda_arguments_nodes_size) + buffer << ", "; + } + + buffer << " -> " << calculateActionNodeName(lambda_node.getExpression()); + + result = buffer.str(); break; } default: diff --git a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference index ac13b3f193e..1d1e81fa5e2 100644 --- a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference +++ b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference @@ -8,3 +8,14 @@ 7 1 8 1 9 1 +[0] +[0] +[1] +[2] +[3] +[4] +[5] +[6] +[7] +[8] +[9] diff --git a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql index e70652877e0..aef1d22372d 100644 --- a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql +++ b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql @@ -23,6 +23,8 @@ set max_bytes_before_external_group_by = 16; select x, count() from ma_dist group by x order by x; +select arrayFilter(y -> y = x, [x]) as f from ma_dist order by f; + drop table if exists shard_0.shard_01231_distributed_aggregation_memory_efficient; drop table if exists shard_1.shard_01231_distributed_aggregation_memory_efficient; From 94041d193cc1d5d2c7b20042a189b2102a274d66 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 May 2024 18:22:59 +0200 Subject: [PATCH 253/651] Fix SIGSEGV due to CPU/Real profiler The problem was due to incorrect unwinding due from signal handlers, which leads to incorrect DWARF (FDE/CIE) interpretation. After this patch I was not able to reproduce the crash for couple of hours, while before it was very stable (I've reduced the minimal threshold for query_profiler_real_time_period_ns), using simply: $ clickhouse-benchmark --port 19000 -q "SELECT * FROM remote('127.{1..10}', system, one)" --query_profiler_real_time_period_ns=1 Note, I'm using here remote() for fibers, that has stack with guard pages that helps with reproducing the crash more faster. P.S. I also have another implementation of this fix, without patching unwind and using info from signal context directly, and even though it is better, because you don't need to trip extra frames and you can use all the 45 frames for something useful, it is too complex, so let's go with a simpler patch first, and I think it could be even backported. Signed-off-by: Azat Khuzhin --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index 40d8eadf96b..ba533a7246a 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 40d8eadf96b127d9b22d53ce7a4fc52aaedea965 +Subproject commit ba533a7246a2686b0552061809612f503804d26b From ea52b25dcb451b5059cf9e4c9136c2f328340b90 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 18:49:45 +0200 Subject: [PATCH 254/651] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a69ca0fb644..8f43c533fce 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -53,6 +53,8 @@ AutoFDO AutoML Autocompletion AvroConfluent +analysisOfVariance +ANOVA BIGINT BIGSERIAL BORO From d4735cb1cedde1202aef1523f01a0260d40c6c91 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 15 May 2024 19:17:53 +0200 Subject: [PATCH 255/651] Add ErrorCodes --- programs/keeper-client/Commands.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 221da4fe976..7d4500fedbb 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int KEEPER_EXCEPTION; +} + bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String path; From 047dd03f430527ffbe84980eeda8656dc7e372d6 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 15 May 2024 19:27:15 +0200 Subject: [PATCH 256/651] Fix special build --- src/Columns/ColumnDynamic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index c6626433877..40e8e350733 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -137,7 +137,7 @@ public: void insertData(const char * pos, size_t length) override { - return variant_column->insertData(pos, length); + variant_column->insertData(pos, length); } void insert(const Field & x) override; From 61acdc2bcb3d98c949748b15c29dcc58d0d73dce Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 15 May 2024 17:38:33 +0000 Subject: [PATCH 257/651] Fix test. --- src/Planner/PlannerActionsVisitor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 49a9305b3ce..c2ab1a99910 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -251,8 +251,10 @@ public: size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size(); for (size_t i = 0; i < lambda_arguments_nodes_size; ++i) { - const auto & lambda_arguments_node = lambda_arguments_nodes[i]; - buffer << calculateActionNodeName(lambda_arguments_node); + const auto & lambda_argument_node = lambda_arguments_nodes[i]; + buffer << calculateActionNodeName(lambda_argument_node); + buffer << ' '; + buffer << lambda_argument_node->as().getResultType()->getName(); if (i + 1 != lambda_arguments_nodes_size) buffer << ", "; From 5102c6eff00436036cebaa2ba5c2ac304a7f52a0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 15 May 2024 18:48:29 +0100 Subject: [PATCH 258/651] impl impl impl --- tests/ci/stress.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index a9f5916464d..86cbe503796 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -75,6 +75,9 @@ def get_options(i: int, upgrade_check: bool) -> str: if not upgrade_check: client_options.append("ignore_drop_queries_probability=0.5") + if random.random() < 0.2: + client_options.append("allow_experimental_parallel_reading_from_replicas=1") + if client_options: options.append(" --client-option " + " ".join(client_options)) From 0daf4065eb254d9f8dc5dc511386c7074408a390 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 15 May 2024 19:28:05 +0100 Subject: [PATCH 259/651] fix --- tests/ci/stress.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 86cbe503796..8b8b17dd2a9 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -77,6 +77,9 @@ def get_options(i: int, upgrade_check: bool) -> str: if random.random() < 0.2: client_options.append("allow_experimental_parallel_reading_from_replicas=1") + client_options.append("max_parallel_replicas=3") + client_options.append("cluster_for_parallel_replicas='parallel_replicas'") + client_options.append("parallel_replicas_for_non_replicated_merge_tree=1") if client_options: options.append(" --client-option " + " ".join(client_options)) From 4cfe2665de328a7a7921e0b0a76ddf9b4e2d5486 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 15 May 2024 20:28:17 +0200 Subject: [PATCH 260/651] Update src/Formats/FormatSettings.h --- src/Formats/FormatSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 004b16b6061..bf3269bd42d 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -363,7 +363,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool allow_variable_number_of_columns = false; bool crlf_end_of_line_input = false; - } tsv; + } tsv{}; struct { From c13a0cae814b9a3e9b1fb6740006e626c2b34e0a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 15 May 2024 20:33:41 +0200 Subject: [PATCH 261/651] fix another bug --- src/Interpreters/DDLWorker.cpp | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 8afe92c31cd..fb461146906 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -793,6 +793,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( bool executed_by_us = false; bool executed_by_other_leader = false; + bool extra_attempt_for_replicated_database = false; + /// Defensive programming. One hour is more than enough to execute almost all DDL queries. /// If it will be very long query like ALTER DELETE for a huge table it's still will be executed, /// but DDL worker can continue processing other queries. @@ -837,7 +839,14 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( /// Checking and incrementing counter exclusively. size_t counter = parse(zookeeper->get(tries_to_execute_path)); if (counter > MAX_TRIES_TO_EXECUTE) - break; + { + /// Replicated databases have their own retries, limiting retries here would break outer retries + bool is_replicated_database_task = dynamic_cast(&task); + if (is_replicated_database_task) + extra_attempt_for_replicated_database = true; + else + break; + } zookeeper->set(tries_to_execute_path, toString(counter + 1)); @@ -851,6 +860,8 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( executed_by_us = true; break; } + else if (extra_attempt_for_replicated_database) + break; } /// Waiting for someone who will execute query and change is_executed_path node @@ -894,7 +905,9 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica( else /// If we exceeded amount of tries { LOG_WARNING(log, "Task {} was not executed by anyone, maximum number of retries exceeded", task.entry_name); - task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, maximum retries exceeded"); + bool keep_original_error = extra_attempt_for_replicated_database && task.execution_status.code; + if (!keep_original_error) + task.execution_status = ExecutionStatus(ErrorCodes::UNFINISHED, "Cannot execute replicated DDL query, maximum retries exceeded"); } return false; } @@ -1191,8 +1204,12 @@ void DDLWorker::runMainThread() } LOG_ERROR(log, "Unexpected error ({} times in a row), will try to restart main thread: {}", subsequent_errors_count, message); - reset_state(); + + /// Sleep before retying sleepForSeconds(5); + /// Reset state after sleeping, so DatabaseReplicated::canExecuteReplicatedMetadataAlter() + /// will have a chance even when the database got stuck in infinite retries + reset_state(); } } } From 9d909618a8a73a0186533ff66ddd8c90067839d2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 15 May 2024 19:55:45 +0100 Subject: [PATCH 262/651] fix --- src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 7d9691b847d..d1fd141f43b 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -440,7 +440,7 @@ void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnno ++sent_initial_requests; LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count); - if (sent_initial_requests == replicas_count) + if (sent_initial_requests == replicas_count - unavailable_replicas_count) setProgressCallback(); /// Sift the queue to move out all invisible segments From 78a9731734df5414d9fbabce1e9d88f4720ed99a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 15 May 2024 20:38:35 +0100 Subject: [PATCH 263/651] impl --- .../ParallelReplicasReadingCoordinator.cpp | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index d1fd141f43b..ec849b65c1a 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -167,6 +167,7 @@ public: Stats stats; size_t replicas_count{0}; size_t unavailable_replicas_count{0}; + size_t sent_initial_requests{0}; ProgressCallback progress_callback; explicit ImplInterface(size_t replicas_count_) @@ -177,9 +178,17 @@ public: virtual ~ImplInterface() = default; virtual ParallelReadResponse handleRequest(ParallelReadRequest request) = 0; - virtual void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0; + virtual void doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0; virtual void markReplicaAsUnavailable(size_t replica_number) = 0; + void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) + { + if (++sent_initial_requests > replicas_count) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Initiator received more initial requests than there are replicas"); + + doHandleInitialAllRangesAnnouncement(std::move(announcement)); + } + void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } }; @@ -215,7 +224,7 @@ public: ParallelReadResponse handleRequest(ParallelReadRequest request) override; - void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override; + void doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override; void markReplicaAsUnavailable(size_t replica_number) override; @@ -223,7 +232,6 @@ private: /// This many granules will represent a single segment of marks that will be assigned to a replica const size_t mark_segment_size{0}; - size_t sent_initial_requests{0}; bool state_initialized{false}; size_t finished_replicas{0}; @@ -422,7 +430,7 @@ void DefaultCoordinator::setProgressCallback() } } -void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) +void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { const auto replica_num = announcement.replica_num; @@ -437,7 +445,6 @@ void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnno ++stats[replica_num].number_of_requests; replica_status[replica_num].is_announcement_received = true; - ++sent_initial_requests; LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count); if (sent_initial_requests == replicas_count - unavailable_replicas_count) @@ -781,6 +788,11 @@ ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest reque { /// Nobody will come to process any more data + for (const auto & part : all_parts_to_read) + if (!part.description.ranges.empty()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Some segments were left unread for the part {}", part.description.describe()); + if (!ranges_for_stealing_queue.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Some orphaned segments were left unread"); @@ -818,7 +830,7 @@ public: } ParallelReadResponse handleRequest([[ maybe_unused ]] ParallelReadRequest request) override; - void handleInitialAllRangesAnnouncement([[ maybe_unused ]] InitialAllRangesAnnouncement announcement) override; + void doHandleInitialAllRangesAnnouncement([[maybe_unused]] InitialAllRangesAnnouncement announcement) override; void markReplicaAsUnavailable(size_t replica_number) override; Parts all_parts_to_read; @@ -840,7 +852,7 @@ void InOrderCoordinator::markReplicaAsUnavailable(size_t replica_number) } template -void InOrderCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) +void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { LOG_TRACE(log, "Received an announcement {}", announcement.describe()); @@ -1051,7 +1063,10 @@ ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t re { } -ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() = default; +ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() +{ + chassert(pimpl); +} void ParallelReplicasReadingCoordinator::setProgressCallback(ProgressCallback callback) { From 04fb84d4ade10df2a4fc9f6cb6f94ac4993d1ffd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 15 May 2024 21:57:15 +0200 Subject: [PATCH 264/651] Update src/Core/SettingsChangesHistory.h Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Core/SettingsChangesHistory.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 6edfcc129f8..e004e83355b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -91,13 +91,13 @@ static std::map sett {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."}, {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"hdfs_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"}, - {"azure_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"}, - {"s3_validate_request_settings", true, true, "Validate S3 request settings"}, + {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, + {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, + {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS"}, - {"azure_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageAzureBlob"}, - {"s3_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageS3"}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, + {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, + {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, }}, From a63e846724f503607fe38b34fda37345ee8111c5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 15 May 2024 22:13:48 +0200 Subject: [PATCH 265/651] Review fixes --- docs/en/operations/settings/settings.md | 20 +++++++++++++++++++ .../StorageObjectStorageSink.cpp | 2 +- .../ObjectStorage/StorageObjectStorageSink.h | 2 +- src/Storages/S3Queue/S3QueueSource.cpp | 4 ++-- .../TableFunctionObjectStorage.cpp | 5 ++--- .../TableFunctionObjectStorage.h | 10 ++++++++-- .../TableFunctionObjectStorageCluster.h | 19 ++++++++++-------- 7 files changed, 45 insertions(+), 17 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1772a3aa861..3a79eb64c67 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3675,6 +3675,16 @@ Possible values: Default value: `0`. +## s3_validate_request_settings {#s3_validate_request_settings} + +Enables s3 request settings validation. + +Possible values: +- 1 — validate settings. +- 0 — do not validate settings. + +Default value: `1`. + ## hdfs_truncate_on_insert {#hdfs_truncate_on_insert} Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. @@ -3747,6 +3757,16 @@ Possible values: Default value: `0`. +## azure_skip_empty_files {#azure_skip_empty_files} + +Enables or disables skipping empty files in S3 engine. + +Possible values: +- 0 — `SELECT` throws an exception if empty file is not compatible with requested format. +- 1 — `SELECT` returns empty result for empty file. + +Default value: `0`. + ## engine_url_skip_empty_files {#engine_url_skip_empty_files} Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables. diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 81bdeaa43a3..0a3cf19a590 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -15,7 +15,7 @@ namespace ErrorCodes StorageObjectStorageSink::StorageObjectStorageSink( ObjectStoragePtr object_storage, ConfigurationPtr configuration, - std::optional format_settings_, + const std::optional & format_settings_, const Block & sample_block_, ContextPtr context, const std::string & blob_path) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index a3c8ef68cf0..45cf83d606f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -13,7 +13,7 @@ public: StorageObjectStorageSink( ObjectStoragePtr object_storage, ConfigurationPtr configuration, - std::optional format_settings_, + const std::optional & format_settings_, const Block & sample_block_, ContextPtr context, const std::string & blob_path = ""); diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 0cee94769c4..458f681d7b5 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -235,7 +235,7 @@ Chunk StorageS3QueueSource::generate() catch (...) { LOG_ERROR(log, "Failed to set file {} as failed: {}", - key_with_info->key, getCurrentExceptionMessage(true)); + key_with_info->relative_path, getCurrentExceptionMessage(true)); } appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); @@ -262,7 +262,7 @@ Chunk StorageS3QueueSource::generate() catch (...) { LOG_ERROR(log, "Failed to set file {} as failed: {}", - key_with_info->key, getCurrentExceptionMessage(true)); + key_with_info->relative_path, getCurrentExceptionMessage(true)); } appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 9f16a9a0b25..550d9cc799b 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -116,9 +116,8 @@ StoragePtr TableFunctionObjectStorage::executeImpl( columns, ConstraintsDescription{}, String{}, - /// No format_settings for table function Azure - std::nullopt, - /* distributed_processing */ false, + /* format_settings */std::nullopt, + /* distributed_processing */false, nullptr); storage->startup(); diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index bbc40cc6191..86b8f0d5e14 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -32,6 +32,7 @@ struct AzureDefinition " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; + static constexpr auto max_number_of_arguments = 8; }; struct S3Definition @@ -51,6 +52,7 @@ struct S3Definition " - url, access_key_id, secret_access_key, format, structure, compression_method\n" " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + static constexpr auto max_number_of_arguments = 8; }; struct GCSDefinition @@ -58,6 +60,7 @@ struct GCSDefinition static constexpr auto name = "gcs"; static constexpr auto storage_type_name = "GCS"; static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; }; struct COSNDefinition @@ -65,6 +68,7 @@ struct COSNDefinition static constexpr auto name = "cosn"; static constexpr auto storage_type_name = "COSN"; static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; }; struct OSSDefinition @@ -72,6 +76,7 @@ struct OSSDefinition static constexpr auto name = "oss"; static constexpr auto storage_type_name = "OSS"; static constexpr auto signature = S3Definition::signature; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments; }; struct HDFSDefinition @@ -82,6 +87,7 @@ struct HDFSDefinition " - uri, format\n" " - uri, format, structure\n" " - uri, format, structure, compression_method\n"; + static constexpr auto max_number_of_arguments = 4; }; template @@ -91,7 +97,7 @@ public: static constexpr auto name = Definition::name; static constexpr auto signature = Definition::signature; - static size_t getMaxNumberOfArguments() { return 8; } + static size_t getMaxNumberOfArguments() { return Definition::max_number_of_arguments; } String getName() const override { return name; } @@ -105,7 +111,7 @@ public: bool supportsReadingSubsetOfColumns(const ContextPtr & context) override { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); + return configuration->format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context); } std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h index 76786fafe99..296791b8bda 100644 --- a/src/TableFunctions/TableFunctionObjectStorageCluster.h +++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h @@ -17,17 +17,10 @@ class StorageAzureConfiguration; struct AzureClusterDefinition { - /** - * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) - * A table function, which allows to process many files from Azure Blob Storage on a specific cluster - * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks - * in Azure Blob Storage file path and dispatch each file dynamically. - * On worker node it asks initiator about next task to process, processes it. - * This is repeated until the tasks are finished. - */ static constexpr auto name = "azureBlobStorageCluster"; static constexpr auto storage_type_name = "AzureBlobStorageCluster"; static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; + static constexpr auto max_number_of_arguments = AzureDefinition::max_number_of_arguments + 1; }; struct S3ClusterDefinition @@ -44,6 +37,7 @@ struct S3ClusterDefinition " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n" "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments + 1; }; struct HDFSClusterDefinition @@ -54,8 +48,17 @@ struct HDFSClusterDefinition " - cluster_name, uri, format\n" " - cluster_name, uri, format, structure\n" " - cluster_name, uri, format, structure, compression_method\n"; + static constexpr auto max_number_of_arguments = HDFSDefinition::max_number_of_arguments + 1; }; +/** +* Class implementing s3/hdfs/azureBlobStorage)Cluster(...) table functions, +* which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster. +* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks +* in file path and dispatch each file dynamically. +* On worker node it asks initiator about next task to process, processes it. +* This is repeated until the tasks are finished. +*/ template class TableFunctionObjectStorageCluster : public ITableFunctionCluster> { From f19615788bf05be3440cddf552d0bf51e33cbc5c Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 15 May 2024 22:37:33 +0000 Subject: [PATCH 266/651] Fix special build --- src/Columns/ColumnDynamic.cpp | 6 +++--- src/DataTypes/DataTypeDynamic.cpp | 2 +- src/DataTypes/Serializations/SerializationDynamic.h | 2 +- src/Parsers/ParserDataType.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 0f247638d92..d63a03dbafd 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -227,7 +227,7 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) auto & variant_col = assert_cast(*variant_column); /// If variants are different, we need to extend our variant with new variants. - if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) { variant_col.insertFrom(*dynamic_src.variant_column, n, *global_discriminators_mapping); return; @@ -281,7 +281,7 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size auto & variant_col = assert_cast(*variant_column); /// If variants are different, we need to extend our variant with new variants. - if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) { variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping); return; @@ -443,7 +443,7 @@ void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, si auto & variant_col = assert_cast(*variant_column); /// If variants are different, we need to extend our variant with new variants. - if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) + if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info)) { variant_col.insertManyFrom(*dynamic_src.variant_column, position, length, *global_discriminators_mapping); return; diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index 2c6b3eba906..c920e69c13b 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -67,7 +67,7 @@ static DataTypePtr create(const ASTPtr & arguments) if (identifier_name != "max_types") throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected identifier: {}. Dynamic data type argument should be in a form 'max_types=N'", identifier_name); - auto literal = argument->arguments->children[1]->as(); + auto * literal = argument->arguments->children[1]->as(); if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > 255) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h index 7471ff54cf7..001a3cf87ce 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.h +++ b/src/DataTypes/Serializations/SerializationDynamic.h @@ -11,7 +11,7 @@ class SerializationDynamicElement; class SerializationDynamic : public ISerialization { public: - SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) + explicit SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_) { } diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index c88b5e0e3a2..78d62456fcf 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -55,7 +55,7 @@ private: class ParserDataTypeArgument : public IParserBase { public: - ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) + explicit ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) { } From 18b8d302b2eca62e67feb2d5ad5b91429c73ab24 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 16 May 2024 00:37:38 +0200 Subject: [PATCH 267/651] make tests great again --- src/Storages/StorageReplicatedMergeTree.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index dc23f080013..df5bbdf9f78 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1693,6 +1693,9 @@ bool StorageReplicatedMergeTree::checkPartsImpl(bool skip_sanity_checks) UInt64 total_rows_on_filesystem = 0; for (const auto & part : parts) total_rows_on_filesystem += part->rows_count; + /// We need to sum the rows count of all unexpected data parts; + for (const auto & part : unexpected_data_parts) + total_rows_on_filesystem += part.part->rows_count; const auto storage_settings_ptr = getSettings(); bool insane = uncovered_unexpected_parts_rows > total_rows_on_filesystem * storage_settings_ptr->replicated_max_ratio_of_wrong_parts; From 91c240c85c4c9179f253806f6f0f62e5b34edd5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 16 May 2024 00:43:38 +0200 Subject: [PATCH 268/651] Update Datasketches --- contrib/datasketches-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/datasketches-cpp b/contrib/datasketches-cpp index c3abaaefe5f..836b87e3131 160000 --- a/contrib/datasketches-cpp +++ b/contrib/datasketches-cpp @@ -1 +1 @@ -Subproject commit c3abaaefe5fa400eed99e082af07c1b61a7144db +Subproject commit 836b87e313107506e15ce1f8d50241be56652e58 From f9c1c719b9e243119313a21499da47b615ff4c2e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 16 May 2024 00:04:28 +0000 Subject: [PATCH 269/651] fix explain current transaction --- src/Interpreters/executeQuery.cpp | 8 +++++--- .../03155_explain_current_transaction.reference | 1 + .../0_stateless/03155_explain_current_transaction.sql | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03155_explain_current_transaction.reference create mode 100644 tests/queries/0_stateless/03155_explain_current_transaction.sql diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 33a4cf2a74c..f1f72a4ea4a 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -808,12 +808,14 @@ static std::tuple executeQueryImpl( bool is_create_parameterized_view = false; if (const auto * create_query = ast->as()) + { is_create_parameterized_view = create_query->isParameterizedView(); + } else if (const auto * explain_query = ast->as()) { - assert(!explain_query->children.empty()); - if (const auto * create_of_explain_query = explain_query->children[0]->as()) - is_create_parameterized_view = create_of_explain_query->isParameterizedView(); + if (!explain_query->children.empty()) + if (const auto * create_of_explain_query = explain_query->children[0]->as()) + is_create_parameterized_view = create_of_explain_query->isParameterizedView(); } /// Replace ASTQueryParameter with ASTLiteral for prepared statements. diff --git a/tests/queries/0_stateless/03155_explain_current_transaction.reference b/tests/queries/0_stateless/03155_explain_current_transaction.reference new file mode 100644 index 00000000000..76f7af4a4ff --- /dev/null +++ b/tests/queries/0_stateless/03155_explain_current_transaction.reference @@ -0,0 +1 @@ + diff --git a/tests/queries/0_stateless/03155_explain_current_transaction.sql b/tests/queries/0_stateless/03155_explain_current_transaction.sql new file mode 100644 index 00000000000..fa0fd06e798 --- /dev/null +++ b/tests/queries/0_stateless/03155_explain_current_transaction.sql @@ -0,0 +1 @@ +EXPLAIN CURRENT TRANSACTION; From df90cb79b8923154a027417d9827b0cccc6310d5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 16 May 2024 06:48:24 +0000 Subject: [PATCH 270/651] process nested subqueries --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f0a3a2c74b6..5359ac8b97e 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5615,9 +5615,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi /// Replace storage with values storage of insertion block if (StoragePtr storage = scope.context->getViewSource()) { - if (auto * query_node = in_second_argument->as()) + QueryTreeNodePtr table_expression; + /// Don't even ask... there's turtles all the way down... + for (auto * query_node = in_second_argument->as(); query_node; query_node = table_expression->as()) + table_expression = extractLeftTableExpression(query_node->getJoinTree()); + + if (table_expression) { - auto table_expression = extractLeftTableExpression(query_node->getJoinTree()); if (auto * query_table_node = table_expression->as()) { if (query_table_node->getStorageID().getFullNameNotQuoted() == storage->getStorageID().getFullNameNotQuoted()) From 8b87ec301e8af9b3b1cde089fe80c7f9eadee0e4 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 16 May 2024 10:36:04 +0200 Subject: [PATCH 271/651] Skip whitespaces before optional path arg --- programs/keeper-client/Commands.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 7d4500fedbb..ec5eaf5070c 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -221,6 +221,8 @@ bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node->args.push_back(threshold->as().value); + ParserToken{TokenType::Whitespace}.ignore(pos); + String path; if (!parseKeeperPath(pos, expected, path)) path = "."; From 0bd4f55241a2c7466ad40c6f1299c34a9abcae3c Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 16 May 2024 10:36:57 +0200 Subject: [PATCH 272/651] Test for find_super_nodes and find_big_family --- ...3135_keeper_client_find_commands.reference | 7 +++++ .../03135_keeper_client_find_commands.sh | 29 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/03135_keeper_client_find_commands.reference create mode 100755 tests/queries/0_stateless/03135_keeper_client_find_commands.sh diff --git a/tests/queries/0_stateless/03135_keeper_client_find_commands.reference b/tests/queries/0_stateless/03135_keeper_client_find_commands.reference new file mode 100644 index 00000000000..3a0f080674f --- /dev/null +++ b/tests/queries/0_stateless/03135_keeper_client_find_commands.reference @@ -0,0 +1,7 @@ +find_super_nodes +/test-keeper-client-default/1 4 +/test-keeper-client-default/1/d 3 +find_big_family +/test-keeper-client-default 10 +/test-keeper-client-default/1 9 +/test-keeper-client-default/1/d 4 diff --git a/tests/queries/0_stateless/03135_keeper_client_find_commands.sh b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh new file mode 100755 index 00000000000..0acc4014f1f --- /dev/null +++ b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +path="/test-keeper-client-$CLICKHOUSE_DATABASE" + +$CLICKHOUSE_KEEPER_CLIENT -q "rm $path" >& /dev/null + +$CLICKHOUSE_KEEPER_CLIENT -q "create $path 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a/a 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/b 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/c 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/a 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/b 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/c 'foobar'" + +echo 'find_super_nodes' +$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 1000000000" +$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 3 $path" + +echo 'find_big_family' +$CLICKHOUSE_KEEPER_CLIENT -q "find_big_family $path 3" + +$CLICKHOUSE_KEEPER_CLIENT -q "rmr $path" From 81a7862ed426b2091bf1a1ad0ca78053c20b4df9 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 16 May 2024 07:51:21 +0000 Subject: [PATCH 273/651] Allow certain ALTER TABLE commands for plain_rewritable Currently all alters are disallowed, this is too restrictive. Allow altering settings (through {MODIFY|RESET} SETTING) and comments (MODIFY COMMENT). --- src/Storages/MergeTree/MergeTreeData.cpp | 7 +++-- .../test_s3_plain_rewritable/test.py | 30 +++++++++++++++++++ .../03008_local_plain_rewritable.reference | 2 ++ .../03008_local_plain_rewritable.sh | 20 ++++++++++++- 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fcbb9bb7208..05bc09f115f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3013,8 +3013,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context "Experimental full-text index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); for (const auto & disk : getDisks()) - if (!disk->supportsHardLinks()) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE is not supported for immutable disk '{}'", disk->getName()); + if (!disk->supportsHardLinks() && !commands.isSettingsAlter() && !commands.isCommentAlter()) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "ALTER TABLE commands are not supported on immutable disk '{}', except for setting and comment alteration", + disk->getName()); /// Set of columns that shouldn't be altered. NameSet columns_alter_type_forbidden; diff --git a/tests/integration/test_s3_plain_rewritable/test.py b/tests/integration/test_s3_plain_rewritable/test.py index 51786c55dab..67e3ec987a9 100644 --- a/tests/integration/test_s3_plain_rewritable/test.py +++ b/tests/integration/test_s3_plain_rewritable/test.py @@ -80,6 +80,36 @@ def test_insert(): == insert_values_arr[i] ) + for i in range(NUM_WORKERS): + nodes[i].query("ALTER TABLE test MODIFY SETTING old_parts_lifetime = 59") + assert ( + nodes[i] + .query( + "SELECT engine_full from system.tables WHERE database = currentDatabase() AND name = 'test'" + ) + .find("old_parts_lifetime = 59") + != -1 + ) + + nodes[i].query("ALTER TABLE test RESET SETTING old_parts_lifetime") + assert ( + nodes[i] + .query( + "SELECT engine_full from system.tables WHERE database = currentDatabase() AND name = 'test'" + ) + .find("old_parts_lifetime") + == -1 + ) + nodes[i].query("ALTER TABLE test MODIFY COMMENT 'new description'") + assert ( + nodes[i] + .query( + "SELECT comment from system.tables WHERE database = currentDatabase() AND name = 'test'" + ) + .find("new description") + != -1 + ) + @pytest.mark.order(1) def test_restart(): diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.reference b/tests/queries/0_stateless/03008_local_plain_rewritable.reference index 10fc932ca4d..f69e11a3ca3 100644 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.reference +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.reference @@ -9,6 +9,7 @@ 4 4 4 4 7 7 5 5 5 +1 10006 0 0 0 1 1 1 @@ -20,3 +21,4 @@ 4 4 4 4 7 7 5 5 5 +0 diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index 77bc6763fd8..1761c7d79b1 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -9,8 +9,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "drop table if exists test_mt sync" ${CLICKHOUSE_CLIENT} -nm --query " -create table test_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) +create table test_mt (a Int32, b Int64, c Int64) +engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) settings disk = disk( + name = disk_s3_plain, type = object_storage, object_storage_type = local, metadata_type = plain_rewritable, @@ -29,7 +31,23 @@ select (*) from test_mt order by tuple(a, b) limit 10; ${CLICKHOUSE_CLIENT} --query "optimize table test_mt final" +${CLICKHOUSE_CLIENT} -nm --query " +alter table test_mt modify setting disk = 'disk_s3_plain', old_parts_lifetime = 3600; +select engine_full from system.tables WHERE database = currentDatabase() AND name = 'test_mt'; +" | grep -c "old_parts_lifetime = 3600" + ${CLICKHOUSE_CLIENT} -nm --query " select count(*) from test_mt; select (*) from test_mt order by tuple(a, b) limit 10; " + +${CLICKHOUSE_CLIENT} -nm --query " +alter table test_mt update c = 0 where a % 2 = 1; +alter table test_mt add column d Int64 after c; +alter table test_mt drop column c; +" 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" + +${CLICKHOUSE_CLIENT} -nm --query " +truncate table test_mt; +select count(*) from test_mt; +" From 4b5f31a22cd1322c9e78306c4b4e987329bc3b95 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 16 May 2024 10:42:38 +0200 Subject: [PATCH 274/651] Fix test for big family to account for whole subtree --- tests/integration/test_keeper_client/test.py | 26 +++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 9d7a46001e7..0abd6884574 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -47,24 +47,26 @@ def test_big_family(client: KeeperClient): assert response == TSV( [ - ["/test_big_family/1", "5"], - ["/test_big_family/2", "3"], - ["/test_big_family/2/3", "0"], - ["/test_big_family/2/2", "0"], - ["/test_big_family/2/1", "0"], - ["/test_big_family/1/5", "0"], - ["/test_big_family/1/4", "0"], - ["/test_big_family/1/3", "0"], - ["/test_big_family/1/2", "0"], - ["/test_big_family/1/1", "0"], + ["/test_big_family", "11"], + ["/test_big_family/1", "6"], + ["/test_big_family/2", "4"], + ["/test_big_family/2/3", "1"], + ["/test_big_family/2/2", "1"], + ["/test_big_family/2/1", "1"], + ["/test_big_family/1/5", "1"], + ["/test_big_family/1/4", "1"], + ["/test_big_family/1/3", "1"], + ["/test_big_family/1/2", "1"], + ["/test_big_family/1/1", "1"], ] ) - response = client.find_big_family("/test_big_family", 1) + response = client.find_big_family("/test_big_family", 2) assert response == TSV( [ - ["/test_big_family/1", "5"], + ["/test_big_family", "11"], + ["/test_big_family/1", "6"], ] ) From 7ee64e55ed89d1486e63997411add16427f7f917 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 16 May 2024 12:07:01 +0300 Subject: [PATCH 275/651] Recursive CTE distributed fix --- src/Interpreters/Context.cpp | 33 +++++++++---- src/Interpreters/Context.h | 3 ++ src/Processors/Sources/RecursiveCTESource.cpp | 6 +++ .../03154_recursive_cte_distributed.reference | 26 ++++++++++ .../03154_recursive_cte_distributed.sql | 48 +++++++++++++++++++ 5 files changed, 106 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/03154_recursive_cte_distributed.reference create mode 100644 tests/queries/0_stateless/03154_recursive_cte_distributed.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 98d6031edca..c7fd71093b1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1607,6 +1607,21 @@ Tables Context::getExternalTables() const void Context::addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +{ + addExternalTable(table_name, std::make_shared(std::move(temporary_table))); +} + +void Context::updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +{ + updateExternalTable(table_name, std::make_shared(std::move(temporary_table))); +} + +void Context::addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +{ + addOrUpdateExternalTable(table_name, std::make_shared(std::move(temporary_table))); +} + +void Context::addExternalTable(const String & table_name, std::shared_ptr temporary_table) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); @@ -1614,34 +1629,32 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & std::lock_guard lock(mutex); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists", backQuoteIfNeed(table_name)); - external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); + + external_tables_mapping.emplace(table_name, std::move(temporary_table)); } -void Context::updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +void Context::updateExternalTable(const String & table_name, std::shared_ptr temporary_table) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); - auto temporary_table_ptr = std::make_shared(std::move(temporary_table)); - std::lock_guard lock(mutex); auto it = external_tables_mapping.find(table_name); if (it == external_tables_mapping.end()) throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} does not exists", backQuoteIfNeed(table_name)); - it->second = std::move(temporary_table_ptr); + + it->second = std::move(temporary_table); } -void Context::addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table) +void Context::addOrUpdateExternalTable(const String & table_name, std::shared_ptr temporary_table) { if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); - auto temporary_table_ptr = std::make_shared(std::move(temporary_table)); - std::lock_guard lock(mutex); - auto [it, inserted] = external_tables_mapping.emplace(table_name, temporary_table_ptr); + auto [it, inserted] = external_tables_mapping.emplace(table_name, temporary_table); if (!inserted) - it->second = std::move(temporary_table_ptr); + it->second = std::move(temporary_table); } std::shared_ptr Context::findExternalTable(const String & table_name) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index a6ab503430b..190b3840124 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -685,6 +685,9 @@ public: void addExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); void updateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); void addOrUpdateExternalTable(const String & table_name, TemporaryTableHolder && temporary_table); + void addExternalTable(const String & table_name, std::shared_ptr temporary_table); + void updateExternalTable(const String & table_name, std::shared_ptr temporary_table); + void addOrUpdateExternalTable(const String & table_name, std::shared_ptr temporary_table); std::shared_ptr findExternalTable(const String & table_name) const; std::shared_ptr removeExternalTable(const String & table_name); diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index b94cb188086..93503b45aaf 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -102,6 +102,7 @@ public: "Recursive CTE subquery {}. Expected projection columns to have same size in recursive and non recursive subquery.", recursive_cte_union_node->formatASTForErrorMessage()); + working_temporary_table_holder = recursive_cte_table->holder; working_temporary_table_storage = recursive_cte_table->storage; intermediate_temporary_table_holder = std::make_shared( @@ -147,6 +148,7 @@ public: truncateTemporaryTable(working_temporary_table_storage); + std::swap(intermediate_temporary_table_holder, working_temporary_table_holder); std::swap(intermediate_temporary_table_storage, working_temporary_table_storage); } @@ -172,6 +174,9 @@ private: SelectQueryOptions select_query_options; select_query_options.merge_tree_enable_remove_parts_from_snapshot_optimization = false; + const auto & recursive_table_name = recursive_cte_union_node->as().getCTEName(); + recursive_query_context->addOrUpdateExternalTable(recursive_table_name, working_temporary_table_holder); + auto interpreter = std::make_unique(query_to_execute, recursive_query_context, select_query_options); auto pipeline_builder = interpreter->buildQueryPipeline(); @@ -225,6 +230,7 @@ private: QueryTreeNodePtr recursive_query; ContextMutablePtr recursive_query_context; + TemporaryTableHolderPtr working_temporary_table_holder; StoragePtr working_temporary_table_storage; TemporaryTableHolderPtr intermediate_temporary_table_holder; diff --git a/tests/queries/0_stateless/03154_recursive_cte_distributed.reference b/tests/queries/0_stateless/03154_recursive_cte_distributed.reference new file mode 100644 index 00000000000..0dd6d31104d --- /dev/null +++ b/tests/queries/0_stateless/03154_recursive_cte_distributed.reference @@ -0,0 +1,26 @@ +a [''] 0 +b a ['a'] 0 +c a ['a'] 0 +b a ['','b'] 1 +c a ['','c'] 1 +-- +a [''] 0 +b a ['a'] 0 +c a ['a'] 0 +b a ['','b'] 1 +c a ['','c'] 1 +-- +a [''] 0 +a [''] 0 +b a ['a'] 0 +b a ['a'] 0 +c a ['a'] 0 +c a ['a'] 0 +b a ['','b'] 1 +b a ['','b'] 1 +b a ['','b'] 1 +b a ['','b'] 1 +c a ['','c'] 1 +c a ['','c'] 1 +c a ['','c'] 1 +c a ['','c'] 1 diff --git a/tests/queries/0_stateless/03154_recursive_cte_distributed.sql b/tests/queries/0_stateless/03154_recursive_cte_distributed.sql new file mode 100644 index 00000000000..b8c3356d5d6 --- /dev/null +++ b/tests/queries/0_stateless/03154_recursive_cte_distributed.sql @@ -0,0 +1,48 @@ +-- Tags: shard + +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id String, + parent_id String +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_table VALUES ('a', ''); +INSERT INTO test_table VALUES ('b', 'a'); +INSERT INTO test_table VALUES ('c', 'a'); + +WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, [parent_id] AS path, toUInt64(0) AS depth + FROM test_table + UNION ALL + SELECT t.id, t.parent_id, arrayConcat(path, [t.id]) as path, depth + 1 + FROM test_table t, search_tree st + WHERE t.parent_id = st.id) +SELECT * FROM search_tree ORDER BY depth, id, parent_id; + +SELECT '--'; + +WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, [parent_id] AS path, toUInt64(0) AS depth + FROM remote('127.0.0.1', currentDatabase(), test_table) + UNION ALL + SELECT t.id, t.parent_id, arrayConcat(path, [t.id]) as path, depth + 1 + FROM remote('127.0.0.1', currentDatabase(), test_table) t, search_tree st + WHERE t.parent_id = st.id) +SELECT * FROM search_tree ORDER BY depth, id, parent_id; + +SELECT '--'; + +WITH RECURSIVE search_tree AS ( + SELECT id, parent_id, [parent_id] AS path, toUInt64(0) AS depth + FROM remote('127.0.0.{1,2}', currentDatabase(), test_table) + UNION ALL + SELECT t.id, t.parent_id, arrayConcat(path, [t.id]) as path, depth + 1 + FROM remote('127.0.0.{1,2}', currentDatabase(), test_table) t, search_tree st + WHERE t.parent_id = st.id) +SELECT * FROM search_tree ORDER BY depth, id, parent_id;; + +DROP TABLE test_table; From 452e45cb290b5ad5281f1a7dfe715eaadb21aae5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 16 May 2024 11:20:06 +0200 Subject: [PATCH 276/651] Update DDLWorker.cpp --- src/Interpreters/DDLWorker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index fb461146906..5639eed552e 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -1205,7 +1205,7 @@ void DDLWorker::runMainThread() LOG_ERROR(log, "Unexpected error ({} times in a row), will try to restart main thread: {}", subsequent_errors_count, message); - /// Sleep before retying + /// Sleep before retrying sleepForSeconds(5); /// Reset state after sleeping, so DatabaseReplicated::canExecuteReplicatedMetadataAlter() /// will have a chance even when the database got stuck in infinite retries From d549fd79d9d1f4eaaa8c60334bddb087c8257a18 Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Thu, 16 May 2024 09:40:34 +0000 Subject: [PATCH 277/651] Fix code: replace `LOONGARCH64` with `LoongArch64` in some files --- docs/en/development/build-cross-loongarch.md | 10 +++++----- docs/en/development/developer-instruction.md | 2 +- docs/ru/development/build-cross-loongarch.mdx | 4 ++-- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/en/development/build-cross-loongarch.md b/docs/en/development/build-cross-loongarch.md index 137b6969fd6..9ffe97d3da7 100644 --- a/docs/en/development/build-cross-loongarch.md +++ b/docs/en/development/build-cross-loongarch.md @@ -1,15 +1,15 @@ --- slug: /en/development/build-cross-loongarch sidebar_position: 70 -title: How to Build ClickHouse on Linux for LOONGARCH64 Architecture -sidebar_label: Build on Linux for LOONGARCH64 +title: How to Build ClickHouse on Linux for LoongArch64 Architecture +sidebar_label: Build on Linux for LoongArch64 --- As of writing (2024/03/15) building for loongarch considered to be highly experimental. Not all features can be enabled. -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with LOONGARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with LoongArch64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. -The cross-build for LOONGARCH64 is based on the [Build instructions](../development/build.md), follow them first. +The cross-build for LoongArch64 is based on the [Build instructions](../development/build.md), follow them first. ## Install Clang-18 @@ -29,4 +29,4 @@ CC=clang-18 CXX=clang++-18 cmake . -Bbuild-loongarch64 -G Ninja -DCMAKE_TOOLCHAI ninja -C build-loongarch64 ``` -The resulting binary will run only on Linux with the LOONGARCH64 CPU architecture. +The resulting binary will run only on Linux with the LoongArch64 CPU architecture. diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index a66be04edd6..ec5760541e8 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -72,7 +72,7 @@ You can also add original ClickHouse repo address to your local repository to pu After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. :::note -Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md), [on Linux for Linux/LOONGARCH](build-cross-loongarch.md) and so on. +Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md), [on Linux for Linux/LoongArch](build-cross-loongarch.md) and so on. ::: ## Build System {#build-system} diff --git a/docs/ru/development/build-cross-loongarch.mdx b/docs/ru/development/build-cross-loongarch.mdx index 31514f7439f..62948af38cf 100644 --- a/docs/ru/development/build-cross-loongarch.mdx +++ b/docs/ru/development/build-cross-loongarch.mdx @@ -1,8 +1,8 @@ --- slug: /ru/development/build-cross-loongarch sidebar_position: 69 -sidebar_label: Build on Linux for LOONGARCH64 -title: Build on Linux for LOONGARCH64 +sidebar_label: Build on Linux for LoongArch64 +title: Build on Linux for LoongArch64 --- import Content from '@site/docs/en/development/build-cross-loongarch.md'; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2a335cfc55a..8e8ca24d238 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -477,6 +477,7 @@ LOCALTIME LOCALTIMESTAMP LOONGARCH LONGLONG +LoongArch Levenshtein Liao LibFuzzer From a424efa348610611d7a5f18ce1e047fdc3d9c6a3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 16 May 2024 10:13:18 +0000 Subject: [PATCH 278/651] fix fuzzer --- src/Client/QueryFuzzer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 7be01686258..03730fcedaa 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -1242,8 +1242,9 @@ void QueryFuzzer::fuzz(ASTPtr & ast) } else if (auto * explain_query = typeid_cast(ast.get())) { + const auto & explained_query = explain_query->getExplainedQuery(); /// Fuzzing EXPLAIN query to SELECT query randomly - if (fuzz_rand() % 20 == 0 && explain_query->getExplainedQuery()->getQueryKind() == IAST::QueryKind::Select) + if (explained_query && explained_query->getQueryKind() == IAST::QueryKind::Select && fuzz_rand() % 20 == 0) { auto select_query = explain_query->getExplainedQuery()->clone(); fuzz(select_query); From 97e7f404c7d86ae73af1329c34a93510dce1f719 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 16 May 2024 12:40:53 +0200 Subject: [PATCH 279/651] Fix test: only top 10 by default --- tests/integration/test_keeper_client/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 0abd6884574..fbfc38ca35c 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -57,7 +57,6 @@ def test_big_family(client: KeeperClient): ["/test_big_family/1/4", "1"], ["/test_big_family/1/3", "1"], ["/test_big_family/1/2", "1"], - ["/test_big_family/1/1", "1"], ] ) From 17346ad3945bb47887229ef7dbf83ff076c46db0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 16 May 2024 10:41:19 +0000 Subject: [PATCH 280/651] Cleanup. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 37 ++++++++--------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index b176f2cd1b5..5307589b6db 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1032,6 +1032,14 @@ public: return true; } private: + void addDuplicatingAlias(const QueryTreeNodePtr & node) + { + scope.nodes_with_duplicated_aliases.emplace(node); + auto cloned_node = node->clone(); + scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + scope.nodes_with_duplicated_aliases.emplace(cloned_node); + } + void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node) { if (!node->hasAlias()) @@ -1046,41 +1054,22 @@ private: if (is_lambda_node) { if (scope.alias_name_to_expression_node->contains(alias)) - { - scope.nodes_with_duplicated_aliases.emplace(node); - auto cloned_node = node->clone(); - scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); - scope.nodes_with_duplicated_aliases.emplace(cloned_node); - } + addDuplicatingAlias(node); auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); if (!inserted) - { - scope.nodes_with_duplicated_aliases.emplace(node); - auto cloned_node = node->clone(); - scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); - scope.nodes_with_duplicated_aliases.emplace(cloned_node); - } + addDuplicatingAlias(node); return; } if (scope.alias_name_to_lambda_node.contains(alias)) - { - scope.nodes_with_duplicated_aliases.emplace(node); - auto cloned_node = node->clone(); - scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); - scope.nodes_with_duplicated_aliases.emplace(cloned_node); - } + addDuplicatingAlias(node); auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node)); if (!inserted) - { - scope.nodes_with_duplicated_aliases.emplace(node); - auto cloned_node = node->clone(); - scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); - scope.nodes_with_duplicated_aliases.emplace(cloned_node); - } + addDuplicatingAlias(node); + /// If node is identifier put it also in scope alias name to lambda node map if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); From a2223f6ec574383934c61ee715b33d7c74a250c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 16 May 2024 12:52:18 +0200 Subject: [PATCH 281/651] Revert "Merge pull request #63783 from yariks5s/update_settings_history" This reverts commit 1041af2be8817a72d15635e67d124bce3d664528, reversing changes made to cd90655fa5597d67f9228af944a4ac76df944c4c. --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 5f3e9ffb611..ece48620618 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,7 +92,7 @@ static std::map sett {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"output_format_pretty_preserve_border_for_multiline_string", 0, 1, "Applies better rendering for multiline strings."}, + {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, From 610801f1befa22f8106273bc182aeebf82e5d5f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 16 May 2024 12:55:10 +0200 Subject: [PATCH 282/651] Revert "Merge pull request #63493 from Volodyachan/fix-tabs-in-pretty-format" This reverts commit 8ed245677d447c0e55a7a9fa84739aa4b670db78, reversing changes made to 7be88cca1f42c3b0a94b337c06e1c93eb31780b2. --- .../Formats/Impl/PrettyBlockOutputFormat.cpp | 27 +++---- .../Formats/Impl/PrettyBlockOutputFormat.h | 4 +- .../Impl/PrettyCompactBlockOutputFormat.cpp | 10 +-- .../Impl/PrettySpaceBlockOutputFormat.cpp | 9 +-- .../00298_enum_width_and_cast.reference | 16 ++--- .../00730_unicode_terminal_format.reference | 18 ++--- .../03148_tabs_in_pretty_format.reference | 72 ------------------- .../03148_tabs_in_pretty_format.sql | 42 ----------- 8 files changed, 35 insertions(+), 163 deletions(-) delete mode 100644 tests/queries/0_stateless/03148_tabs_in_pretty_format.reference delete mode 100644 tests/queries/0_stateless/03148_tabs_in_pretty_format.sql diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 8e0b6df2321..41c7bfa316b 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -24,7 +24,7 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat( /// Note that number of code points is just a rough approximation of visible string width. void PrettyBlockOutputFormat::calculateWidths( const Block & header, const Chunk & chunk, - WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths, size_t table_border_width) + WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths) { size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows); @@ -42,7 +42,7 @@ void PrettyBlockOutputFormat::calculateWidths( /// Calculate widths of all values. String serialized_value; - size_t prefix = format_settings.pretty.output_format_pretty_row_numbers ? row_number_width + table_border_width : table_border_width; // Tab character adjustment + size_t prefix = 2; // Tab character adjustment for (size_t i = 0; i < num_columns; ++i) { const auto & elem = header.getByPosition(i); @@ -187,7 +187,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind WidthsPerColumn widths; Widths max_widths; Widths name_widths; - calculateWidths(header, chunk, widths, max_widths, name_widths, 2); + calculateWidths(header, chunk, widths, max_widths, name_widths); const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? utf8_grid_symbols : @@ -321,7 +321,6 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind std::vector transferred_row(num_columns); bool has_transferred_row = false; - size_t prefix = format_settings.pretty.output_format_pretty_row_numbers ? row_number_width + 2 : 2; for (size_t j = 0; j < num_columns; ++j) { @@ -335,13 +334,11 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind serializations[j]->serializeText(*columns[j], i, out_serialize, format_settings); } if (cut_to_width && format_settings.pretty.preserve_border_for_multiline_string) - splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width, cut_to_width, prefix); - has_transferred_row |= !transferred_row[j].empty(); + splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width); + has_transferred_row |= !transferred_row[j].empty() && cur_width <= cut_to_width; writeValueWithPadding(serialized_value, cur_width, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), !transferred_row[j].empty(), false); - - prefix += max_widths[j] + 3; } writeCString(grid_symbols.bar, out); @@ -502,7 +499,6 @@ void PrettyBlockOutputFormat::writeTransferredRow(const Widths & max_widths, con std::vector new_transferred_row(num_columns); bool has_transferred_row = false; - size_t prefix = format_settings.pretty.output_format_pretty_row_numbers ? row_number_width + 2 : 2; for (size_t j = 0; j < num_columns; ++j) { @@ -514,13 +510,11 @@ void PrettyBlockOutputFormat::writeTransferredRow(const Widths & max_widths, con const auto & type = *header.getByPosition(j).type; size_t cur_width = UTF8::computeWidth(reinterpret_cast(transferred_row[j].data()), transferred_row[j].size()); if (cut_to_width) - splitValueAtBreakLine(transferred_row[j], new_transferred_row[j], cur_width, cut_to_width, prefix); - has_transferred_row |= !new_transferred_row[j].empty(); + splitValueAtBreakLine(transferred_row[j], new_transferred_row[j], cur_width); + has_transferred_row |= !new_transferred_row[j].empty() && cur_width <= cut_to_width; writeValueWithPadding(transferred_row[j], cur_width, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), !new_transferred_row[j].empty(), !transferred_row[j].empty()); - - prefix += max_widths[j] + 3; } if (!space_block) @@ -531,14 +525,13 @@ void PrettyBlockOutputFormat::writeTransferredRow(const Widths & max_widths, con writeTransferredRow(max_widths, header, new_transferred_row, cut_to_width, space_block); } -void PrettyBlockOutputFormat::splitValueAtBreakLine(String & value, String & transferred_value, size_t & value_width, size_t cut_to_width, size_t prefix) +void PrettyBlockOutputFormat::splitValueAtBreakLine(String & value, String & transferred_value, size_t & value_width) { if (size_t break_line_pos = value.find_first_of('\n'); break_line_pos != String::npos) { - value_width = UTF8::computeWidth(reinterpret_cast(value.data()), break_line_pos, prefix); - if (value_width <= cut_to_width) - transferred_value = value.substr(break_line_pos + 1); + transferred_value = value.substr(break_line_pos + 1); value = value.substr(0, break_line_pos); + value_width = UTF8::computeWidth(reinterpret_cast(value.data()), value.size()); } } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index d8309fd2637..6673c61c61b 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -44,7 +44,7 @@ protected: void calculateWidths( const Block & header, const Chunk & chunk, - WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths, size_t table_border_width); + WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths); void writeValueWithPadding( String & value, size_t value_width, size_t pad_to_width, size_t cut_to_width, @@ -52,7 +52,7 @@ protected: void writeTransferredRow(const Widths & max_widths, const Block & header, std::vector & transferred_row, size_t cut_to_width, bool space_block); - void splitValueAtBreakLine(String & value, String & transferred_value, size_t & value_width, size_t cut_to_width, size_t prefix); + void splitValueAtBreakLine(String & value, String & transferred_value, size_t & value_width); void resetFormatterImpl() override { diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 80c4dd7b24c..ce22a3b2864 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -170,8 +170,6 @@ void PrettyCompactBlockOutputFormat::writeRow( std::vector transferred_row(num_columns); bool has_transferred_row = false; - size_t prefix = format_settings.pretty.output_format_pretty_row_numbers ? row_number_width + 2 : 2; - for (size_t j = 0; j < num_columns; ++j) { if (j != 0) @@ -185,13 +183,11 @@ void PrettyCompactBlockOutputFormat::writeRow( serializations[j]->serializeText(*columns[j], row_num, out_serialize, format_settings); } if (cut_to_width && format_settings.pretty.preserve_border_for_multiline_string) - splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width, cut_to_width, prefix); - has_transferred_row |= !transferred_row[j].empty(); + splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width); + has_transferred_row |= !transferred_row[j].empty() && cur_width <= cut_to_width; writeValueWithPadding(serialized_value, cur_width, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), !transferred_row[j].empty(), false); - - prefix += max_widths[j] + 3; } writeCString(grid_symbols.bar, out); @@ -212,7 +208,7 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po WidthsPerColumn widths; Widths max_widths; Widths name_widths; - calculateWidths(header, chunk, widths, max_widths, name_widths, 2); + calculateWidths(header, chunk, widths, max_widths, name_widths); writeHeader(header, max_widths, name_widths); diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 3fe22580e18..d311f005173 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -31,7 +31,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port WidthsPerColumn widths; Widths max_widths; Widths name_widths; - calculateWidths(header, chunk, widths, max_widths, name_widths, 1); + calculateWidths(header, chunk, widths, max_widths, name_widths); if (format_settings.pretty.output_format_pretty_row_numbers) writeString(String(row_number_width, ' '), out); @@ -88,7 +88,6 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port writeCString("\033[0m", out); } - size_t prefix = format_settings.pretty.output_format_pretty_row_numbers ? row_number_width + 1 : 1; for (size_t column = 0; column < num_columns; ++column) { if (column != 0) @@ -102,13 +101,11 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port serializations[column]->serializeText(*columns[column], row, out_serialize, format_settings); } if (cut_to_width && format_settings.pretty.preserve_border_for_multiline_string) - splitValueAtBreakLine(serialized_value, transferred_row[column], cur_width, cur_width, prefix); - has_transferred_row |= !transferred_row[column].empty(); + splitValueAtBreakLine(serialized_value, transferred_row[column], cur_width); + has_transferred_row |= !transferred_row[column].empty() && cur_width <= cut_to_width; writeValueWithPadding(serialized_value, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type), !transferred_row[column].empty(), false); - - prefix += max_widths[column] + 3; } writeReadableNumberTip(chunk); diff --git a/tests/queries/0_stateless/00298_enum_width_and_cast.reference b/tests/queries/0_stateless/00298_enum_width_and_cast.reference index 227a7be0a19..706e3f2ae98 100644 --- a/tests/queries/0_stateless/00298_enum_width_and_cast.reference +++ b/tests/queries/0_stateless/00298_enum_width_and_cast.reference @@ -5,13 +5,13 @@ 1. │ Hello │ 0 │ 2. │ \ │ 0 │ └───────┴───┘ - ┌─x─────┬─y─┐ -1. │ Hello │ 0 │ -2. │ \ │ 0 │ + ┌─x────────┬─y─┐ +1. │ Hello │ 0 │ +2. │ \ │ 0 │ 3. │ \t │ 0 │ - └───────┴───┘ - ┌─x─────┬─y─┬─toInt8(x)─┬─s─────┬─casted─┐ -1. │ Hello │ 0 │ -100 │ Hello │ Hello │ -2. │ \ │ 0 │ 0 │ \ │ \ │ + └──────────┴───┘ + ┌─x────────┬─y─┬─toInt8(x)─┬─s─────┬─casted─┐ +1. │ Hello │ 0 │ -100 │ Hello │ Hello │ +2. │ \ │ 0 │ 0 │ \ │ \ │ 3. │ \t │ 0 │ 111 │ \t │ \t │ - └───────┴───┴───────────┴───────┴────────┘ + └──────────┴───┴───────────┴───────┴────────┘ diff --git a/tests/queries/0_stateless/00730_unicode_terminal_format.reference b/tests/queries/0_stateless/00730_unicode_terminal_format.reference index d64fbe57e9e..0bf728b0743 100644 --- a/tests/queries/0_stateless/00730_unicode_terminal_format.reference +++ b/tests/queries/0_stateless/00730_unicode_terminal_format.reference @@ -63,11 +63,11 @@ ┡━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 13. │ Ahoj │ Tento kód můžete upravit a spustit │ └──────┴────────────────────────────────────┘ - ┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ c1 ┃ c2 ┃ - ┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩ + ┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ c1 ┃ c2 ┃ + ┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩ 14. │ Tabs Tabs │ Non-first Tabs │ - └─────────────────┴───────────────────────┘ + └─────────────┴───────────────────────┘ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ c1 ┃ c2 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -78,11 +78,11 @@ ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ 16. │ Russian ё and ё │ Zero bytes in middle │ └──────────────────┴────────────────────────┘ - ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ - ┃ 'Tabs \t Tabs' ┃ 'Long\tTitle' ┃ - ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ -1. │ Tabs Tabs │ Long Title │ - └──────────────────┴───────────────┘ + ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ + ┃ 'Tabs \t Tabs' ┃ 'Long\tTitle' ┃ + ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ +1. │ Tabs Tabs │ Long Title │ + └────────────────┴───────────────┘ Row 1: ────── '你好': 你好 diff --git a/tests/queries/0_stateless/03148_tabs_in_pretty_format.reference b/tests/queries/0_stateless/03148_tabs_in_pretty_format.reference deleted file mode 100644 index a0f32016d85..00000000000 --- a/tests/queries/0_stateless/03148_tabs_in_pretty_format.reference +++ /dev/null @@ -1,72 +0,0 @@ -┏━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ -┃ id ┃ value ┃ value1 ┃ -┡━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ -│ 0 │ test test │ something │ -└────┴───────────┴────────────────┘ - ┏━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -1. │ 0 │ test test │ something │ - └────┴───────────┴─────────────┘ -┌─id─┬─value─┬─value1────┐ -│ 0 │ test …│ something │ -│ │… test │ │ -└────┴───────┴───────────┘ - ┌─id─┬─value──────┬─value1────┐ -1. │ 0 │ test …│ something │ - │ │… test │ │ - └────┴────────────┴───────────┘ - id value value1 - - 0 test … something - … test - id value value1 - -1. 0 test … something - … test -┌─id─┬─value─────┬─value1────┐ -│ 0 │ something │ test …│ -│ │ │… test │ -└────┴───────────┴───────────┘ - ┌─id─┬─value─────┬─value1─┐ -1. │ 0 │ something │ test …│ - │ │ │… test │ - └────┴───────────┴────────┘ - id value value1 - - 0 something test … - … test - id value value1 - -1. 0 something test … - … test -┏━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ -┃ id ┃ value ┃ value1 ┃ -┡━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ -│ 0 │ something │ test …│ -│ │ │… test │ -├────┼───────────────────┼───────────┤ -│ 1 │ some thing │ test …│ -│ │ │… test │ -└────┴───────────────────┴───────────┘ - ┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ -1. │ 0 │ something │ test …│ - │ │ │… test │ - ├────┼────────────────────────┼───────────┤ -2. │ 1 │ some thing │ test …│ - │ │ │… test │ - └────┴────────────────────────┴───────────┘ -┏━━━━┳━━━━━━━┳━━━━━━━━┓ -┃ id ┃ value ┃ value1 ┃ -┡━━━━╇━━━━━━━╇━━━━━━━━┩ -│ 0 │ somet⋯│ test …│ -│ │ │…testt⋯ │ -└────┴───────┴────────┘ - ┏━━━━┳━━━━━━━┳━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━╇━━━━━━━━┩ -1. │ 0 │ somet⋯│ test …│ - │ │ │…testt⋯ │ - └────┴───────┴────────┘ diff --git a/tests/queries/0_stateless/03148_tabs_in_pretty_format.sql b/tests/queries/0_stateless/03148_tabs_in_pretty_format.sql deleted file mode 100644 index 161cf6304f6..00000000000 --- a/tests/queries/0_stateless/03148_tabs_in_pretty_format.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS t_tabs; - -CREATE TABLE t_tabs (id UInt64, value String, value1 String) ENGINE=MergeTree ORDER BY id; - -INSERT INTO t_tabs VALUES(0, 'test test', '\tsomething'); - -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyMonoBlock; - -TRUNCATE TABLE t_tabs; - -INSERT INTO t_tabs VALUES(0, 'test\n\ttest', 'something'); - -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyCompactNoEscapes SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyCompactNoEscapes; -SELECT * FROM t_tabs FORMAT PrettySpace SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs FORMAT PrettySpace; - -TRUNCATE TABLE t_tabs; - -INSERT INTO t_tabs VALUES(0, 'something', 'test\n\ttest'); - -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyCompactNoEscapes SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyCompactNoEscapes; -SELECT * FROM t_tabs FORMAT PrettySpace SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs FORMAT PrettySpace; - -INSERT INTO t_tabs VALUES(1, '\tsome\tthing\t', 'test\n\ttest'); - -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyMonoBlock; - -TRUNCATE TABLE t_tabs; - -SET output_format_pretty_max_value_width = 5; - -INSERT INTO t_tabs VALUES(0, 'someth\ning\t', 'test\ntesttest'); - -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_tabs ORDER BY id FORMAT PrettyMonoBlock; - -DROP TABLE t_tabs; From 7e429482fc37e91dece953eb3c5570556308e9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 16 May 2024 12:55:26 +0200 Subject: [PATCH 283/651] Revert "Merge pull request #63479 from yariks5s/add_setting_from_multiline_strings" This reverts commit 962d5e5bda7c484e35ac710beeaa1fea05cd99a0, reversing changes made to 8c4a5d36635bef04afc8856c777de65ecc1e6789. --- src/Core/Settings.h | 1 - src/Core/SettingsChangesHistory.h | 1 - src/Formats/FormatFactory.cpp | 1 - src/Formats/FormatSettings.h | 1 - .../Formats/Impl/PrettyBlockOutputFormat.cpp | 4 ++-- .../Impl/PrettyCompactBlockOutputFormat.cpp | 2 +- .../Impl/PrettySpaceBlockOutputFormat.cpp | 2 +- .../03132_pretty_format_break_line.reference | 18 ------------------ .../03132_pretty_format_break_line.sql | 7 ------- 9 files changed, 4 insertions(+), 33 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 05a49029ff8..90895fce508 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1114,7 +1114,6 @@ class IColumn; \ M(String, format_json_object_each_row_column_for_object_name, "", "The name of column that will be used as object names in JSONObjectEachRow format. Column type should be String", 0) \ \ - M(Bool, output_format_pretty_preserve_border_for_multiline_string, true, "Applies better rendering for multiline strings.", 0) \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ece48620618..5ea99aa0192 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,7 +92,6 @@ static std::map sett {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 43ccee173f0..3199445864d 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -174,7 +174,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.pretty.max_value_width_apply_for_single_value = settings.output_format_pretty_max_value_width_apply_for_single_value; format_settings.pretty.highlight_digit_groups = settings.output_format_pretty_highlight_digit_groups; format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers; - format_settings.pretty.preserve_border_for_multiline_string = settings.output_format_pretty_preserve_border_for_multiline_string; format_settings.pretty.output_format_pretty_single_large_number_tip_threshold = settings.output_format_pretty_single_large_number_tip_threshold; format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers; format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 2964158c9d8..f29fc51af6a 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -283,7 +283,6 @@ struct FormatSettings SettingFieldUInt64Auto color{"auto"}; bool output_format_pretty_row_numbers = false; - bool preserve_border_for_multiline_string = true; UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000; enum class Charset : uint8_t diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 41c7bfa316b..178d0b912e1 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -38,7 +38,7 @@ void PrettyBlockOutputFormat::calculateWidths( max_padded_widths.resize_fill(num_columns); name_widths.resize(num_columns); - const bool need_cut_to_width = format_settings.pretty.preserve_border_for_multiline_string && (format_settings.pretty.max_value_width_apply_for_single_value || num_rows != 1 || num_columns != 1 || total_rows != 0); + const bool need_cut_to_width = format_settings.pretty.max_value_width_apply_for_single_value || num_rows != 1 || num_columns != 1 || total_rows != 0; /// Calculate widths of all values. String serialized_value; @@ -333,7 +333,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); serializations[j]->serializeText(*columns[j], i, out_serialize, format_settings); } - if (cut_to_width && format_settings.pretty.preserve_border_for_multiline_string) + if (cut_to_width) splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width); has_transferred_row |= !transferred_row[j].empty() && cur_width <= cut_to_width; diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index ce22a3b2864..345b6c84ecd 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -182,7 +182,7 @@ void PrettyCompactBlockOutputFormat::writeRow( WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); serializations[j]->serializeText(*columns[j], row_num, out_serialize, format_settings); } - if (cut_to_width && format_settings.pretty.preserve_border_for_multiline_string) + if (cut_to_width) splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width); has_transferred_row |= !transferred_row[j].empty() && cur_width <= cut_to_width; diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index d311f005173..6940c20e25b 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -100,7 +100,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); serializations[column]->serializeText(*columns[column], row, out_serialize, format_settings); } - if (cut_to_width && format_settings.pretty.preserve_border_for_multiline_string) + if (cut_to_width) splitValueAtBreakLine(serialized_value, transferred_row[column], cur_width); has_transferred_row |= !transferred_row[column].empty() && cur_width <= cut_to_width; diff --git a/tests/queries/0_stateless/03132_pretty_format_break_line.reference b/tests/queries/0_stateless/03132_pretty_format_break_line.reference index 06b17ce4e12..a5282f89327 100644 --- a/tests/queries/0_stateless/03132_pretty_format_break_line.reference +++ b/tests/queries/0_stateless/03132_pretty_format_break_line.reference @@ -86,21 +86,3 @@ 2. │ 1 │ hello world │ hellow …│ │ │ │…мир │ └────┴─────────────┴─────────────┘ -┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ -┃ id ┃ value ┃ value1 ┃ -┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -│ 0 │ привет -world │ hello world │ -├────┼─────────────┼─────────────┤ -│ 1 │ hello world │ hellow -мир │ -└────┴─────────────┴─────────────┘ - ┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -1. │ 0 │ привет -world │ hello world │ - ├────┼─────────────┼─────────────┤ -2. │ 1 │ hello world │ hellow -мир │ - └────┴─────────────┴─────────────┘ diff --git a/tests/queries/0_stateless/03132_pretty_format_break_line.sql b/tests/queries/0_stateless/03132_pretty_format_break_line.sql index 5f002d8df67..ecf967c1836 100644 --- a/tests/queries/0_stateless/03132_pretty_format_break_line.sql +++ b/tests/queries/0_stateless/03132_pretty_format_break_line.sql @@ -1,7 +1,5 @@ DROP TABLE IF EXISTS t_break_line; -SET output_format_pretty_preserve_border_for_multiline_string=1; - CREATE TABLE t_break_line (id UInt64, value String, value1 String) ENGINE=MergeTree ORDER BY id; INSERT INTO t_break_line VALUES(0, 'hello\nworld', 'hello world'); @@ -32,9 +30,4 @@ INSERT INTO t_break_line VALUES(1, 'hello world', 'hellow\nмир'); SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock; -SET output_format_pretty_preserve_border_for_multiline_string=0; - -SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock; - DROP TABLE t_break_line; \ No newline at end of file From 7bcef97fa1db3cf9be945d25c60c08e47a7b189a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 16 May 2024 12:55:44 +0200 Subject: [PATCH 284/651] Revert "Merge pull request #59940 from Volodyachan/multiline-strings-in-Pretty-formats" This reverts commit e0c23fb07de45657db69e54f41d3d556bf8f3551, reversing changes made to 02a5b010af5823ff72ab2b8fa09bf9896fc765c6. --- .../Formats/Impl/PrettyBlockOutputFormat.cpp | 129 +++--------------- .../Formats/Impl/PrettyBlockOutputFormat.h | 8 +- .../Impl/PrettyCompactBlockOutputFormat.cpp | 19 +-- .../Impl/PrettySpaceBlockOutputFormat.cpp | 21 +-- ...2026_describe_include_subcolumns.reference | 66 ++++----- .../03132_pretty_format_break_line.reference | 88 ------------ .../03132_pretty_format_break_line.sql | 33 ----- 7 files changed, 61 insertions(+), 303 deletions(-) delete mode 100644 tests/queries/0_stateless/03132_pretty_format_break_line.reference delete mode 100644 tests/queries/0_stateless/03132_pretty_format_break_line.sql diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 178d0b912e1..086b5bfada2 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -38,8 +38,6 @@ void PrettyBlockOutputFormat::calculateWidths( max_padded_widths.resize_fill(num_columns); name_widths.resize(num_columns); - const bool need_cut_to_width = format_settings.pretty.max_value_width_apply_for_single_value || num_rows != 1 || num_columns != 1 || total_rows != 0; - /// Calculate widths of all values. String serialized_value; size_t prefix = 2; // Tab character adjustment @@ -70,20 +68,6 @@ void PrettyBlockOutputFormat::calculateWidths( } widths[i][j] = UTF8::computeWidth(reinterpret_cast(serialized_value.data()), serialized_value.size(), prefix); - if (need_cut_to_width && serialized_value.contains('\n')) - { - size_t row_width = 0; - size_t row_start = 0; - for (size_t k = 0; k < serialized_value.size(); ++k) - { - if (serialized_value[k] == '\n') - { - row_width = std::max(row_width, UTF8::computeWidth(reinterpret_cast(serialized_value.data() + row_start), k - row_start, prefix)); - row_start = k + 1; - } - } - widths[i][j] = std::max(row_width, UTF8::computeWidth(reinterpret_cast(serialized_value.data() + row_start), serialized_value.size() - row_start, prefix)); - } max_padded_widths[i] = std::max(max_padded_widths[i], std::min(format_settings.pretty.max_column_pad_width, std::min(format_settings.pretty.max_value_width, widths[i][j]))); @@ -319,34 +303,19 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind writeCString(grid_symbols.bar, out); - std::vector transferred_row(num_columns); - bool has_transferred_row = false; - for (size_t j = 0; j < num_columns; ++j) { if (j != 0) writeCString(grid_symbols.bar, out); const auto & type = *header.getByPosition(j).type; - size_t cur_width = widths[j].empty() ? max_widths[j] : widths[j][i]; - String serialized_value; - { - WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); - serializations[j]->serializeText(*columns[j], i, out_serialize, format_settings); - } - if (cut_to_width) - splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width); - has_transferred_row |= !transferred_row[j].empty() && cur_width <= cut_to_width; - - writeValueWithPadding(serialized_value, cur_width, max_widths[j], cut_to_width, - type.shouldAlignRightInPrettyFormats(), isNumber(type), !transferred_row[j].empty(), false); + writeValueWithPadding(*columns[j], *serializations[j], i, + widths[j].empty() ? max_widths[j] : widths[j][i], + max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); } writeCString(grid_symbols.bar, out); writeReadableNumberTip(chunk); writeCString("\n", out); - - if (has_transferred_row) - writeTransferredRow(max_widths, header, transferred_row, cut_to_width, false); } if (format_settings.pretty.output_format_pretty_row_numbers) @@ -427,34 +396,34 @@ static String highlightDigitGroups(String source) void PrettyBlockOutputFormat::writeValueWithPadding( - String & value, size_t value_width, size_t pad_to_width, size_t cut_to_width, - bool align_right, bool is_number, bool has_break_line, bool is_transferred_value) + const IColumn & column, const ISerialization & serialization, size_t row_num, + size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number) { - if (is_transferred_value) - writeString("…", out); - else - writeChar(' ', out); + String serialized_value = " "; + { + WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); + serialization.serializeText(column, row_num, out_serialize, format_settings); + } if (cut_to_width && value_width > cut_to_width) { - value.resize(UTF8::computeBytesBeforeWidth( - reinterpret_cast(value.data()), value.size(), 0, format_settings.pretty.max_value_width)); + serialized_value.resize(UTF8::computeBytesBeforeWidth( + reinterpret_cast(serialized_value.data()), serialized_value.size(), 0, 1 + format_settings.pretty.max_value_width)); const char * ellipsis = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? "⋯" : "~"; if (color) { - value += "\033[31;1m"; - value += ellipsis; - value += "\033[0m"; + serialized_value += "\033[31;1m"; + serialized_value += ellipsis; + serialized_value += "\033[0m"; } else - value += ellipsis; + serialized_value += ellipsis; value_width = format_settings.pretty.max_value_width; - has_break_line = false; } - else if (!has_break_line) - value += ' '; + else + serialized_value += ' '; auto write_padding = [&]() { @@ -465,74 +434,18 @@ void PrettyBlockOutputFormat::writeValueWithPadding( /// Highlight groups of thousands. if (color && is_number && format_settings.pretty.highlight_digit_groups) - value = highlightDigitGroups(value); + serialized_value = highlightDigitGroups(serialized_value); if (align_right) { write_padding(); - out.write(value.data(), value.size()); + out.write(serialized_value.data(), serialized_value.size()); } else { - out.write(value.data(), value.size()); + out.write(serialized_value.data(), serialized_value.size()); write_padding(); } - - if (has_break_line) - writeString("…", out); -} - -void PrettyBlockOutputFormat::writeTransferredRow(const Widths & max_widths, const Block & header, std::vector & transferred_row, size_t cut_to_width, bool space_block) -{ - const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? - utf8_grid_symbols : - ascii_grid_symbols; - - size_t num_columns = max_widths.size(); - - if (format_settings.pretty.output_format_pretty_row_numbers) - for (size_t i = 0; i < row_number_width; ++i) - writeChar(' ', out); - - if (!space_block) - writeCString(grid_symbols.bar, out); - - std::vector new_transferred_row(num_columns); - bool has_transferred_row = false; - - for (size_t j = 0; j < num_columns; ++j) - { - if (j != 0 && !space_block) - writeCString(grid_symbols.bar, out); - else if (j != 0) - writeCString(" ", out); - - const auto & type = *header.getByPosition(j).type; - size_t cur_width = UTF8::computeWidth(reinterpret_cast(transferred_row[j].data()), transferred_row[j].size()); - if (cut_to_width) - splitValueAtBreakLine(transferred_row[j], new_transferred_row[j], cur_width); - has_transferred_row |= !new_transferred_row[j].empty() && cur_width <= cut_to_width; - - writeValueWithPadding(transferred_row[j], cur_width, max_widths[j], cut_to_width, - type.shouldAlignRightInPrettyFormats(), isNumber(type), !new_transferred_row[j].empty(), !transferred_row[j].empty()); - } - - if (!space_block) - writeCString(grid_symbols.bar, out); - writeCString("\n", out); - - if (has_transferred_row) - writeTransferredRow(max_widths, header, new_transferred_row, cut_to_width, space_block); -} - -void PrettyBlockOutputFormat::splitValueAtBreakLine(String & value, String & transferred_value, size_t & value_width) -{ - if (size_t break_line_pos = value.find_first_of('\n'); break_line_pos != String::npos) - { - transferred_value = value.substr(break_line_pos + 1); - value = value.substr(0, break_line_pos); - value_width = UTF8::computeWidth(reinterpret_cast(value.data()), value.size()); - } } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 6673c61c61b..4c52300fbd1 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -47,12 +47,8 @@ protected: WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths); void writeValueWithPadding( - String & value, size_t value_width, size_t pad_to_width, size_t cut_to_width, - bool align_right, bool is_number, bool has_break_line, bool is_transferred_value); - - void writeTransferredRow(const Widths & max_widths, const Block & header, std::vector & transferred_row, size_t cut_to_width, bool space_block); - - void splitValueAtBreakLine(String & value, String & transferred_value, size_t & value_width); + const IColumn & column, const ISerialization & serialization, size_t row_num, + size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number); void resetFormatterImpl() override { diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 345b6c84ecd..e1cbf69dbf0 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -168,34 +168,19 @@ void PrettyCompactBlockOutputFormat::writeRow( writeCString(grid_symbols.bar, out); - std::vector transferred_row(num_columns); - bool has_transferred_row = false; for (size_t j = 0; j < num_columns; ++j) { if (j != 0) writeCString(grid_symbols.bar, out); const auto & type = *header.getByPosition(j).type; - size_t cur_width = widths[j].empty() ? max_widths[j] : widths[j][row_num]; - String serialized_value; - { - WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); - serializations[j]->serializeText(*columns[j], row_num, out_serialize, format_settings); - } - if (cut_to_width) - splitValueAtBreakLine(serialized_value, transferred_row[j], cur_width); - has_transferred_row |= !transferred_row[j].empty() && cur_width <= cut_to_width; - - writeValueWithPadding(serialized_value, cur_width, max_widths[j], cut_to_width, - type.shouldAlignRightInPrettyFormats(), isNumber(type), !transferred_row[j].empty(), false); + const auto & cur_widths = widths[j].empty() ? max_widths[j] : widths[j][row_num]; + writeValueWithPadding(*columns[j], *serializations[j], row_num, cur_widths, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); } writeCString(grid_symbols.bar, out); writeReadableNumberTip(chunk); writeCString("\n", out); - - if (has_transferred_row) - writeTransferredRow(max_widths, header, transferred_row, cut_to_width, false); } void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind) diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 6940c20e25b..3f224f034aa 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -70,9 +70,6 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port } writeCString("\n\n", out); - std::vector transferred_row(num_columns); - bool has_transferred_row = false; - for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row) { if (format_settings.pretty.output_format_pretty_row_numbers) @@ -94,25 +91,13 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port writeCString(" ", out); const auto & type = *header.getByPosition(column).type; - size_t cur_width = widths[column].empty() ? max_widths[column] : widths[column][row]; - String serialized_value; - { - WriteBufferFromString out_serialize(serialized_value, AppendModeTag()); - serializations[column]->serializeText(*columns[column], row, out_serialize, format_settings); - } - if (cut_to_width) - splitValueAtBreakLine(serialized_value, transferred_row[column], cur_width); - has_transferred_row |= !transferred_row[column].empty() && cur_width <= cut_to_width; - - writeValueWithPadding(serialized_value, cur_width, max_widths[column], cut_to_width, - type.shouldAlignRightInPrettyFormats(), isNumber(type), !transferred_row[column].empty(), false); + auto & cur_width = widths[column].empty() ? max_widths[column] : widths[column][row]; + writeValueWithPadding( + *columns[column], *serializations[column], row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); } writeReadableNumberTip(chunk); writeChar('\n', out); - - if (has_transferred_row) - writeTransferredRow(max_widths, header, transferred_row, cut_to_width, true); } total_rows += num_rows; diff --git a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference index 082aa29de83..dec65f62748 100644 --- a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference +++ b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference @@ -1,33 +1,33 @@ - ┌─name─┬─type─────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┐ -1. │ d │ Date │ │ │ │ │ │ -2. │ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ -3. │ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ -4. │ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ -5. │ t │ Tuple( …│ │ │ │ ZSTD(1) │ │ - │ │… s String, …│ │ │ │ │ │ - │ │… a Array(Tuple( …│ │ │ │ │ │ - │ │… a UInt32, …│ │ │ │ │ │ - │ │… b UInt32))) │ │ │ │ │ │ - └──────┴──────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┘ - ┌─name───────┬─type─────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┬─is_subcolumn─┐ - 1. │ d │ Date │ │ │ │ │ │ 0 │ - 2. │ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ 0 │ - 3. │ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 0 │ - 4. │ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ 0 │ - 5. │ t │ Tuple( …│ │ │ │ ZSTD(1) │ │ 0 │ - │ │… s String, …│ │ │ │ │ │ │ - │ │… a Array(Tuple( …│ │ │ │ │ │ │ - │ │… a UInt32, …│ │ │ │ │ │ │ - │ │… b UInt32))) │ │ │ │ │ │ │ - 6. │ n.null │ UInt8 │ │ │ It is a nullable column │ │ │ 1 │ - 7. │ arr1.size0 │ UInt64 │ │ │ │ │ │ 1 │ - 8. │ arr2.size0 │ UInt64 │ │ │ │ │ d + toIntervalDay(1) │ 1 │ - 9. │ arr2.size1 │ Array(UInt64) │ │ │ │ │ d + toIntervalDay(1) │ 1 │ -10. │ t.s │ String │ │ │ │ ZSTD(1) │ │ 1 │ -11. │ t.a │ Array(Tuple( …│ │ │ │ │ │ 1 │ - │ │… a UInt32, …│ │ │ │ │ │ │ - │ │… b UInt32)) │ │ │ │ │ │ │ -12. │ t.a.size0 │ UInt64 │ │ │ │ │ │ 1 │ -13. │ t.a.a │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ -14. │ t.a.b │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ - └────────────┴──────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┴──────────────┘ + ┌─name─┬─type──────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┐ +1. │ d │ Date │ │ │ │ │ │ +2. │ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ +3. │ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ +4. │ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ +5. │ t │ Tuple( + s String, + a Array(Tuple( + a UInt32, + b UInt32))) │ │ │ │ ZSTD(1) │ │ + └──────┴───────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┘ + ┌─name───────┬─type──────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┬─is_subcolumn─┐ + 1. │ d │ Date │ │ │ │ │ │ 0 │ + 2. │ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ 0 │ + 3. │ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 0 │ + 4. │ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ 0 │ + 5. │ t │ Tuple( + s String, + a Array(Tuple( + a UInt32, + b UInt32))) │ │ │ │ ZSTD(1) │ │ 0 │ + 6. │ n.null │ UInt8 │ │ │ It is a nullable column │ │ │ 1 │ + 7. │ arr1.size0 │ UInt64 │ │ │ │ │ │ 1 │ + 8. │ arr2.size0 │ UInt64 │ │ │ │ │ d + toIntervalDay(1) │ 1 │ + 9. │ arr2.size1 │ Array(UInt64) │ │ │ │ │ d + toIntervalDay(1) │ 1 │ +10. │ t.s │ String │ │ │ │ ZSTD(1) │ │ 1 │ +11. │ t.a │ Array(Tuple( + a UInt32, + b UInt32)) │ │ │ │ │ │ 1 │ +12. │ t.a.size0 │ UInt64 │ │ │ │ │ │ 1 │ +13. │ t.a.a │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ +14. │ t.a.b │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ + └────────────┴───────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┴──────────────┘ diff --git a/tests/queries/0_stateless/03132_pretty_format_break_line.reference b/tests/queries/0_stateless/03132_pretty_format_break_line.reference deleted file mode 100644 index a5282f89327..00000000000 --- a/tests/queries/0_stateless/03132_pretty_format_break_line.reference +++ /dev/null @@ -1,88 +0,0 @@ -┌─id─┬─value─┬─value1──────┐ -│ 0 │ hello…│ hello world │ -│ │…world │ │ -└────┴───────┴─────────────┘ - ┌─id─┬─value─┬─value1──────┐ -1. │ 0 │ hello…│ hello world │ - │ │…world │ │ - └────┴───────┴─────────────┘ - ┌─id─┬─value─┬─value1──────┐ -1. │ 0 │ hello…│ hello world │ - │ │…world │ │ - └────┴───────┴─────────────┘ - ┏━━━━┳━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━╇━━━━━━━━━━━━━┩ -1. │ 0 │ hello…│ hello world │ - │ │…world │ │ - └────┴───────┴─────────────┘ - id value value1 - - 0 hello… hello world - …world - id value value1 - -1. 0 hello… hello world - …world -┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ -┃ id ┃ value ┃ value1 ┃ -┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -│ 0 │ hello …│ hello world │ -│ │…world │ │ -├────┼─────────────┼─────────────┤ -│ 1 │ hello world │ hello …│ -│ │ │…world │ -└────┴─────────────┴─────────────┘ - ┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -1. │ 0 │ hello …│ hello world │ - │ │…world │ │ - ├────┼─────────────┼─────────────┤ -2. │ 1 │ hello world │ hello …│ - │ │ │…world │ - └────┴─────────────┴─────────────┘ -┌─id─┬─value──┬─value1──────┐ -│ 0 │ привет…│ hello world │ -│ │…world │ │ -└────┴────────┴─────────────┘ - ┌─id─┬─value──┬─value1──────┐ -1. │ 0 │ привет…│ hello world │ - │ │…world │ │ - └────┴────────┴─────────────┘ - ┌─id─┬─value──┬─value1──────┐ -1. │ 0 │ привет…│ hello world │ - │ │…world │ │ - └────┴────────┴─────────────┘ - ┏━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━━╇━━━━━━━━━━━━━┩ -1. │ 0 │ привет…│ hello world │ - │ │…world │ │ - └────┴────────┴─────────────┘ - id value value1 - - 0 привет… hello world - …world - id value value1 - -1. 0 привет… hello world - …world -┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ -┃ id ┃ value ┃ value1 ┃ -┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -│ 0 │ привет …│ hello world │ -│ │…world │ │ -├────┼─────────────┼─────────────┤ -│ 1 │ hello world │ hellow …│ -│ │ │…мир │ -└────┴─────────────┴─────────────┘ - ┏━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ - ┃ id ┃ value ┃ value1 ┃ - ┡━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ -1. │ 0 │ привет …│ hello world │ - │ │…world │ │ - ├────┼─────────────┼─────────────┤ -2. │ 1 │ hello world │ hellow …│ - │ │ │…мир │ - └────┴─────────────┴─────────────┘ diff --git a/tests/queries/0_stateless/03132_pretty_format_break_line.sql b/tests/queries/0_stateless/03132_pretty_format_break_line.sql deleted file mode 100644 index ecf967c1836..00000000000 --- a/tests/queries/0_stateless/03132_pretty_format_break_line.sql +++ /dev/null @@ -1,33 +0,0 @@ -DROP TABLE IF EXISTS t_break_line; - -CREATE TABLE t_break_line (id UInt64, value String, value1 String) ENGINE=MergeTree ORDER BY id; - -INSERT INTO t_break_line VALUES(0, 'hello\nworld', 'hello world'); - -SELECT * FROM t_break_line FORMAT PrettyCompactNoEscapes SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line FORMAT PrettyCompactNoEscapes; -SELECT * FROM t_break_line FORMAT PrettyCompact; -SELECT * FROM t_break_line FORMAT Pretty; -SELECT * FROM t_break_line FORMAT PrettySpace SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line FORMAT PrettySpace; - -INSERT INTO t_break_line VALUES(1, 'hello world', 'hello\nworld'); -SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock; - -TRUNCATE TABLE t_break_line; - -INSERT INTO t_break_line VALUES(0, 'привет\nworld', 'hello world'); - -SELECT * FROM t_break_line FORMAT PrettyCompactNoEscapes SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line FORMAT PrettyCompactNoEscapes; -SELECT * FROM t_break_line FORMAT PrettyCompact; -SELECT * FROM t_break_line FORMAT Pretty; -SELECT * FROM t_break_line FORMAT PrettySpace SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line FORMAT PrettySpace; - -INSERT INTO t_break_line VALUES(1, 'hello world', 'hellow\nмир'); -SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock SETTINGS output_format_pretty_row_numbers = 0; -SELECT * FROM t_break_line ORDER BY id FORMAT PrettyMonoBlock; - -DROP TABLE t_break_line; \ No newline at end of file From 5debc4af38255b64fa4b8eb2af05743a4793c7bc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 16 May 2024 11:07:13 +0000 Subject: [PATCH 285/651] Fixing style. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 5307589b6db..ef3224c45d2 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1069,7 +1069,7 @@ private: auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node)); if (!inserted) addDuplicatingAlias(node); - + /// If node is identifier put it also in scope alias name to lambda node map if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); From e8c2b68333167ff651ee491e51c24f91bb7204a4 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 16 May 2024 13:06:13 +0200 Subject: [PATCH 286/651] Fix bug when `Required check [1/3]` is not included in Mergeable Check --- tests/ci/ci_config.py | 12 ++++++++++++ tests/ci/commit_status_helper.py | 8 +++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 588f4934125..f2625cf4ae3 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import logging +import re from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from copy import deepcopy from dataclasses import dataclass, field @@ -1386,6 +1387,17 @@ REQUIRED_CHECKS = [ JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, ] +BATCH_REGEXP = re.compile(r"\s+\[[0-9/]+\]$") + + +def is_required(check_name: str) -> bool: + """Checks if a check_name is in REQUIRED_CHECKS, including batched jobs""" + if check_name in REQUIRED_CHECKS: + return True + if batch := BATCH_REGEXP.search(check_name): + return check_name[: batch.start()] in REQUIRED_CHECKS + return False + @dataclass class CheckDescription: diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 0b51d98b479..fc939a08e11 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -17,7 +17,7 @@ from github.GithubObject import NotSet from github.IssueComment import IssueComment from github.Repository import Repository -from ci_config import CHECK_DESCRIPTIONS, REQUIRED_CHECKS, CheckDescription, StatusNames +from ci_config import CHECK_DESCRIPTIONS, CheckDescription, StatusNames, is_required from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, @@ -453,7 +453,7 @@ def update_mergeable_check( "check if the check_name in REQUIRED_CHECKS and then trigger update" not_run = ( pr_info.labels.intersection({Labels.SKIP_MERGEABLE_CHECK, Labels.RELEASE}) - or check_name not in REQUIRED_CHECKS + or not is_required(check_name) or pr_info.release_pr or pr_info.number == 0 ) @@ -472,9 +472,7 @@ def trigger_mergeable_check( commit: Commit, statuses: CommitStatuses, hide_url: bool = False ) -> CommitStatus: """calculate and update StatusNames.MERGEABLE""" - required_checks = [ - status for status in statuses if status.context in REQUIRED_CHECKS - ] + required_checks = [status for status in statuses if is_required(status.context)] mergeable_status = None for status in statuses: From 793a11fd19784a446f7c66672803fb3b2db0582a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 May 2024 13:41:47 +0200 Subject: [PATCH 287/651] libunwind: remove useless _DEBUG flag Signed-off-by: Azat Khuzhin --- contrib/libunwind-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 8f3cd8bd07b..53d36498985 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -31,7 +31,7 @@ add_library(unwind ${LIBUNWIND_SOURCES}) set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) -target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) +target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_LIBUNWIND_IS_NATIVE_ONLY) # We should enable optimizations (otherwise it will be too slow in debug) # and disable sanitizers (otherwise infinite loop may happen) From 86cf1e13d866333b8a511badd7f2fe186d810646 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 15 May 2024 13:43:54 +0200 Subject: [PATCH 288/651] libunwind: fix usage of libunwind.h (by defining -D_LIBUNWIND_IS_NATIVE_ONLY) From this macros sizeof(unw_context_t)/sizeof(unw_cursor_t) is depends (_LIBUNWIND_CONTEXT_SIZE/_LIBUNWIND_CURSOR_SIZE). So it should be not only PRIVATE but for INTERFACE as well. Signed-off-by: Azat Khuzhin --- contrib/libunwind-cmake/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 53d36498985..37a2f29afcf 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -31,7 +31,9 @@ add_library(unwind ${LIBUNWIND_SOURCES}) set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) -target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_LIBUNWIND_IS_NATIVE_ONLY) +target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1) +# NOTE: from this macros sizeof(unw_context_t)/sizeof(unw_cursor_t) is depends, so it should be set always +target_compile_definitions(unwind PUBLIC -D_LIBUNWIND_IS_NATIVE_ONLY) # We should enable optimizations (otherwise it will be too slow in debug) # and disable sanitizers (otherwise infinite loop may happen) From 42246791f049bce0cb6c9681ebb8d74c469591c4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Apr 2024 20:00:56 +0200 Subject: [PATCH 289/651] utils/c++expr: allow to change build directory Signed-off-by: Azat Khuzhin --- utils/c++expr | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/utils/c++expr b/utils/c++expr index c70a4c7d382..059248918a9 100755 --- a/utils/c++expr +++ b/utils/c++expr @@ -12,6 +12,7 @@ OPTIONS: -l LIB link against LIB (only for -I or -C) -b STEPS_NUM make program to benchmark specified code snippet and run tests with STEPS_NUM each -b perf-top run infinite benchmark and show perf top + -B build-dir build directory for -I (default: "build") -t TESTS_NUM make program to benchmark specified code snippet and run TESTS_NUM tests -o FILE do not run, just save binary executable file -O CXX_OPTS forward option compiler (e.g. -O "-O3 -std=c++20") @@ -37,6 +38,7 @@ GLOBAL= OUTPUT_EXECUTABLE= INCS="vector iostream typeinfo cstdlib cmath sys/time.h" LIBS="" +BUILD_DIR=build BENCHMARK_STEPS=0 RUN_PERFTOP= BENCHMARK_TESTS=5 @@ -51,7 +53,7 @@ CMD_PARAMS= # if [ "$1" == "--help" ] || [ -z "$1" ]; then usage; fi -while getopts "vc:CIi:l:b:t:o:O:g:" OPT; do +while getopts "vc:CIi:l:bB:t:o:O:g:" OPT; do case "$OPT" in v) set -x; ;; c) CXX="$OPTARG"; ;; @@ -60,6 +62,7 @@ while getopts "vc:CIi:l:b:t:o:O:g:" OPT; do i) INCS="$INCS $OPTARG"; ;; l) LIBS="$LIBS $OPTARG"; ;; b) if [ "$OPTARG" = perf-top ]; then BENCHMARK_STEPS=-1; RUN_PERFTOP=y; else BENCHMARK_STEPS="$OPTARG"; fi; ;; + B) BUILD_DIR="$OPTARG"; ;; t) BENCHMARK_TESTS="$OPTARG"; ;; o) OUTPUT_EXECUTABLE="$OPTARG"; ;; O) CXX_OPTS="$CXX_OPTS $OPTARG"; ;; @@ -110,11 +113,11 @@ find_clickhouse_root () { find_clickhouse_build () { local CLICKHOUSE_ROOT="`find_clickhouse_root`" - if [ -e "$CLICKHOUSE_ROOT/build/CMakeCache.txt" ]; then - echo "$CLICKHOUSE_ROOT/build" + if [ -e "$CLICKHOUSE_ROOT/$BUILD_DIR/CMakeCache.txt" ]; then + echo "$CLICKHOUSE_ROOT/$BUILD_DIR" return 0 fi - echo "error: $CLICKHOUSE_ROOT/build/CMakeCache.txt doesn't exist" + echo "error: $CLICKHOUSE_ROOT/$BUILD_DIR/CMakeCache.txt doesn't exist" return 1 } From ae2f71f289c82dcd64266b8e4e45920cfc0330ad Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 27 Apr 2024 20:03:46 +0200 Subject: [PATCH 290/651] utils/c++expr: add ability to preserve generated worktree and binary Signed-off-by: Azat Khuzhin --- utils/c++expr | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/utils/c++expr b/utils/c++expr index 059248918a9..8cf5d3a3b16 100755 --- a/utils/c++expr +++ b/utils/c++expr @@ -7,6 +7,7 @@ USAGE: c++expr [-c CXX | -C | -I] [-i INCLUDE] [-l LIB] [-b STEPS] [-t TESTS] [- OPTIONS: -c CXX use specified c++ compiler -C use cmake + -k keep generated worktree -I integrate into ClickHouse build tree in current directory -i INC add #include -l LIB link against LIB (only for -I or -C) @@ -47,13 +48,14 @@ USE_CLICKHOUSE= CXX=g++ CXX_OPTS= CMD_PARAMS= +KEEP_WORKTREE=0 # # Parse command line # if [ "$1" == "--help" ] || [ -z "$1" ]; then usage; fi -while getopts "vc:CIi:l:bB:t:o:O:g:" OPT; do +while getopts "vc:CIi:l:bkB:t:o:O:g:" OPT; do case "$OPT" in v) set -x; ;; c) CXX="$OPTARG"; ;; @@ -63,6 +65,7 @@ while getopts "vc:CIi:l:bB:t:o:O:g:" OPT; do l) LIBS="$LIBS $OPTARG"; ;; b) if [ "$OPTARG" = perf-top ]; then BENCHMARK_STEPS=-1; RUN_PERFTOP=y; else BENCHMARK_STEPS="$OPTARG"; fi; ;; B) BUILD_DIR="$OPTARG"; ;; + k) KEEP_WORKTREE=1; ;; t) BENCHMARK_TESTS="$OPTARG"; ;; o) OUTPUT_EXECUTABLE="$OPTARG"; ;; O) CXX_OPTS="$CXX_OPTS $OPTARG"; ;; @@ -147,13 +150,17 @@ if [ -n "$USE_CLICKHOUSE" ]; then echo "add_subdirectory ($SUBDIR)" >>$CALL_DIR/CMakeLists.txt cleanup() { mv $CALL_DIR/CMakeLists.txt.backup.$$ $CALL_DIR/CMakeLists.txt - rm -rf $WORKDIR - rm -rf ${BUILD_ROOT}${CLICKHOUSE_PATH} + if [ $KEEP_WORKTREE -eq 0 ]; then + rm -rf $WORKDIR + rm -rf ${BUILD_ROOT}${CLICKHOUSE_PATH} + fi } else WORKDIR=/var/tmp/cppexpr_$$ cleanup() { - rm -rf $WORKDIR + if [ $KEEP_WORKTREE -eq 0 ]; then + rm -rf $WORKDIR + fi } fi From d7f95ddfcf1c6b0f25c273615caf4be42986778c Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 16 May 2024 13:16:01 +0200 Subject: [PATCH 291/651] CI: Enable Arm integration tests job in CI --- .github/PULL_REQUEST_TEMPLATE.md | 9 +- tests/ci/ci_config.py | 139 +++++++++++++++++-------------- 2 files changed, 81 insertions(+), 67 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3e0131a388a..64dc9049bc2 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -42,25 +42,25 @@ At a minimum, the following information should be added (but add more as needed) > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/
- Modify your CI run + CI Settings **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step -#### Include tests (required builds will be added automatically): -- [ ] Fast test +#### Run these jobs only (required builds will be added automatically): - [ ] Integration Tests - [ ] Stateless tests - [ ] Stateful tests - [ ] Unit tests - [ ] Performance tests +- [ ] All with aarch64 - [ ] All with ASAN - [ ] All with TSAN - [ ] All with Analyzer - [ ] All with Azure - [ ] Add your option here -#### Exclude tests: +#### Deny these jobs: - [ ] Fast test - [ ] Integration Tests - [ ] Stateless tests @@ -72,7 +72,6 @@ At a minimum, the following information should be added (but add more as needed) - [ ] All with UBSAN - [ ] All with Coverage - [ ] All with Aarch64 -- [ ] Add your option here #### Extra options: - [ ] do not test (only style check) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 588f4934125..84041b8782f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -448,9 +448,9 @@ bugfix_validate_check = DigestConfig( ], exclude_files=[".md"], docker=IMAGES.copy() - + [ - "clickhouse/stateless-test", - ], + + [ + "clickhouse/stateless-test", + ], ) # common test params docker_server_job_config = JobConfig( @@ -570,7 +570,7 @@ class CIConfig: if self.is_build_job(job_name): stage_type = CIStages.BUILDS_1 if job_name in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK_SPECIAL + JobNames.BUILD_CHECK_SPECIAL ): # special builds go to Build_2 stage to not delay Builds_1/Test_1 stage_type = CIStages.BUILDS_2 @@ -584,7 +584,7 @@ class CIConfig: required_build = CI_CONFIG.test_configs[job_name].required_build assert required_build if required_build in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK + JobNames.BUILD_CHECK ): stage_type = CIStages.TESTS_1 else: @@ -597,10 +597,10 @@ class CIConfig: def get_job_config(self, check_name: str) -> JobConfig: res = None for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, + self.other_jobs_configs, ): if check_name in config: # type: ignore res = config[check_name].job_config # type: ignore @@ -612,47 +612,47 @@ class CIConfig: if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST: result = Runners.BUILDER elif any( - words in check_name.lower() - for words in [ - "install packages", - "compatibility check", - "docker", - "build check", - "jepsen", - "style check", - ] + words in check_name.lower() + for words in [ + "install packages", + "compatibility check", + "docker", + "build check", + "jepsen", + "style check", + ] ): result = Runners.STYLE_CHECKER elif check_name == JobNames.DOCS_CHECK: # docs job is demanding result = Runners.FUNC_TESTER_ARM elif any( - words in check_name.lower() - for words in [ - "stateless", - "stateful", - "clickbench", - "sqllogic test", - "libfuzzer", - "bugfix validation", - ] + words in check_name.lower() + for words in [ + "stateless", + "stateful", + "clickbench", + "sqllogic test", + "libfuzzer", + "bugfix validation", + ] ): result = Runners.FUNC_TESTER elif any( - words in check_name.lower() - for words in ["stress", "upgrade", "integration", "performance comparison"] + words in check_name.lower() + for words in ["stress", "upgrade", "integration", "performance comparison"] ): result = Runners.STRESS_TESTER elif any( - words in check_name.lower() - for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] + words in check_name.lower() + for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] ): result = Runners.FUZZER_UNIT_TESTER assert result, f"BUG, no runner for [{check_name}]" if ( - "aarch" in check_name.lower() or "arm64" in check_name.lower() + "aarch" in check_name.lower() or "arm64" in check_name.lower() ) and "aarch" not in result: if result == Runners.STRESS_TESTER: # FIXME: no arm stress tester group atm @@ -683,10 +683,10 @@ class CIConfig: check_name = self.normalize_string(check_name) for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, + self.other_jobs_configs, ): for job_name in config: # type: ignore if check_name == self.normalize_string(job_name): @@ -714,10 +714,10 @@ class CIConfig: def get_digest_config(self, check_name: str) -> DigestConfig: res = None for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, + self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, ): if check_name in config: # type: ignore res = config[check_name].job_config.digest # type: ignore @@ -732,15 +732,15 @@ class CIConfig: """ assert branch for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, + self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, ): yield from config # type: ignore def get_builds_for_report( - self, report_name: str, release: bool = False, backport: bool = False + self, report_name: str, release: bool = False, backport: bool = False ) -> List[str]: # hack to modify build list for release and bp wf assert not (release and backport), "Invalid input" @@ -1155,16 +1155,20 @@ CI_CONFIG = CIConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_ASAN, + job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_MSAN, + job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, + job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_TSAN, + job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), # End stateful tests for parallel replicas JobNames.STATELESS_TEST_ASAN: TestConfig( @@ -1207,7 +1211,8 @@ CI_CONFIG = CIConfig( ), JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig( Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True, run_by_ci_option=True), # type: ignore + job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True, + run_by_ci_option=True), # type: ignore ), JobNames.STATELESS_TEST_S3_TSAN: TestConfig( Build.PACKAGE_TSAN, @@ -1220,28 +1225,39 @@ CI_CONFIG = CIConfig( Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) + # type: ignore ), JobNames.STRESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, + job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) + # type: ignore ), JobNames.UPGRADE_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore + Build.PACKAGE_ASAN, + job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) + # type: ignore ), JobNames.STRESS_TEST_AZURE_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore + Build.PACKAGE_TSAN, + job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore ), JobNames.STRESS_TEST_AZURE_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore + Build.PACKAGE_MSAN, + job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore ), JobNames.UPGRADE_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore + Build.PACKAGE_TSAN, + job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) + # type: ignore ), JobNames.UPGRADE_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore + Build.PACKAGE_MSAN, + job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) + # type: ignore ), JobNames.UPGRADE_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore @@ -1260,8 +1276,7 @@ CI_CONFIG = CIConfig( ), JobNames.INTEGRATION_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, - # add [run_by_label="test arm"] to not run in regular pr workflow by default - job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"), # type: ignore + job_config=JobConfig(num_batches=5, **integration_test_common_params), # type: ignore ), JobNames.INTEGRATION_TEST: TestConfig( Build.PACKAGE_RELEASE, @@ -1335,7 +1350,8 @@ CI_CONFIG = CIConfig( ), JobNames.PERFORMANCE_TEST_ARM64: TestConfig( Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), + # type: ignore ), JobNames.SQLANCER: TestConfig( Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params @@ -1365,7 +1381,6 @@ CI_CONFIG = CIConfig( ) CI_CONFIG.validate() - # checks required by Mergeable Check REQUIRED_CHECKS = [ "PR Check", From 87f3d9103dbce17c82321656a50c4bd80583ca66 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 16 May 2024 13:49:47 +0200 Subject: [PATCH 292/651] fix 02124_insert_deduplication_token_multiple_blocks --- .../02124_insert_deduplication_token_multiple_blocks.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks.sh b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks.sh index 04ccbda6235..9a7ac2007f1 100755 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks.sh +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks.sh @@ -9,6 +9,7 @@ INSERT_BLOCK_SETTINGS="max_insert_block_size=1&min_insert_block_size_rows=0&min_ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS block_dedup_token SYNC" $CLICKHOUSE_CLIENT --query="CREATE TABLE block_dedup_token (id Int32) ENGINE=MergeTree() ORDER BY id SETTINGS non_replicated_deduplication_window=0xFFFFFFFF;" +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES block_dedup_token;" $CLICKHOUSE_CLIENT --query="SELECT 'insert 2 blocks with dedup token, 1 row per block'" DEDUP_TOKEN='dedup1' From 22573361de3c4cdbd105e47856f00d1411d081e8 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 16 May 2024 13:58:19 +0200 Subject: [PATCH 293/651] fixing typos and var names --- tests/ci/ci.py | 34 +++---- tests/ci/ci_config.py | 195 +++++++++++++++++------------------- tests/ci/test_ci_options.py | 6 +- 3 files changed, 110 insertions(+), 125 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 08048564383..3ed584f5d93 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -71,12 +71,12 @@ class PendingState: class CiCache: """ CI cache is a bunch of records. Record is a file stored under special location on s3. - The file name has following format + The file name has the following format _[]--___.ci RECORD_TYPE: - SUCCESSFUL - for successfuly finished jobs + SUCCESSFUL - for successfully finished jobs PENDING - for pending jobs ATTRIBUTES: @@ -508,7 +508,7 @@ class CiCache: self, job: str, batch: int, num_batches: int, release_branch: bool ) -> bool: """ - checks if a given job have already been done successfuly + checks if a given job have already been done successfully """ return self.exist( self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch @@ -749,7 +749,7 @@ class CiOptions: # list of specified jobs to run ci_jobs: Optional[List[str]] = None - # btaches to run for all multi-batch jobs + # batches to run for all multi-batch jobs job_batches: Optional[List[int]] = None do_not_test: bool = False @@ -903,7 +903,7 @@ class CiOptions: if self.ci_sets: for tag in self.ci_sets: label_config = CI_CONFIG.get_label_config(tag) - assert label_config, f"Unknonwn tag [{tag}]" + assert label_config, f"Unknown tag [{tag}]" print( f"NOTE: CI Set's tag: [{tag}], add jobs: [{label_config.run_jobs}]" ) @@ -953,7 +953,7 @@ class CiOptions: jobs_params[job] = { "batches": list(range(num_batches)), "num_batches": num_batches, - "run_if_ci_option_include_set": job_config.run_by_ci_option + "run_by_ci_option": job_config.run_by_ci_option and pr_info.is_pr, } @@ -969,7 +969,7 @@ class CiOptions: for job in jobs_to_do[:]: job_param = jobs_params[job] if ( - job_param["run_if_ci_option_include_set"] + job_param["run_by_ci_option"] and job not in jobs_to_do_requested ): print( @@ -1010,7 +1010,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: parser.add_argument( "--pre", action="store_true", - help="Action that executes prerequesetes for the job provided in --job-name", + help="Action that executes prerequisites for the job provided in --job-name", ) parser.add_argument( "--run", @@ -1080,7 +1080,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: "--skip-jobs", action="store_true", default=False, - help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)", + help="skip fetching data about job runs, used in --configure action (for debugging and nightly ci)", ) parser.add_argument( "--force", @@ -1298,7 +1298,7 @@ def _configure_docker_jobs(docker_digest_or_latest: bool) -> Dict: missing_amd64 = [] missing_aarch64 = [] if not docker_digest_or_latest: - # look for missing arm and amd images only among missing multiarch manifests @missing_multi_dict + # look for missing arm and amd images only among missing multi-arch manifests @missing_multi_dict # to avoid extra dockerhub api calls missing_amd64 = list( check_missing_images_on_dockerhub(missing_multi_dict, "amd64") @@ -1396,7 +1396,7 @@ def _configure_jobs( ): continue - # fill job randomization buckets (for jobs with configured @random_bucket property)) + # fill job randomization buckets (for jobs with configured @random_bucket property) if job_config.random_bucket: if not job_config.random_bucket in randomization_buckets: randomization_buckets[job_config.random_bucket] = set() @@ -1445,7 +1445,7 @@ def _configure_jobs( jobs_params[job] = { "batches": batches_to_do, "num_batches": num_batches, - "run_if_ci_option_include_set": job_config.run_by_ci_option + "run_by_ci_option": job_config.run_by_ci_option and pr_info.is_pr, } elif add_to_skip: @@ -1490,8 +1490,8 @@ def _configure_jobs( def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: """ populates GH Actions' workflow with real jobs - "Builds_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] - "Tests_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}] + "Builds_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] + "Tests_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] ... """ result = {} # type: Dict[str, Any] @@ -1582,7 +1582,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: for match in matches if match in CILabels or match.startswith("job_") or match.startswith("batch_") ] - print(f"CI modifyers from commit message: [{res}]") + print(f"CI modifiers from commit message: [{res}]") res_2 = [] if pr_info.is_pr: matches = [match[-1] for match in re.findall(pattern, pr_info.body)] @@ -1593,7 +1593,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: or match.startswith("job_") or match.startswith("batch_") ] - print(f"CI modifyers from PR body: [{res_2}]") + print(f"CI modifiers from PR body: [{res_2}]") return list(set(res + res_2)) @@ -1659,7 +1659,7 @@ def _upload_build_artifacts( report_url = ci_cache.upload_build_report(build_result) print(f"Report file has been uploaded to [{report_url}]") - # Upload head master binaries + # Upload master head's binaries static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name if pr_info.is_master and static_bin_name: # Full binary with debug info: diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 84041b8782f..dc67e05455c 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -50,9 +50,9 @@ class CILabels(metaclass=WithIter): CI_SET_ARM = "ci_set_arm" CI_SET_INTEGRATION = "ci_set_integration" CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" - CI_SET_STATLESS = "ci_set_stateless" + CI_SET_STATELESS = "ci_set_stateless" CI_SET_STATEFUL = "ci_set_stateful" - CI_SET_STATLESS_ASAN = "ci_set_stateless_asan" + CI_SET_STATELESS_ASAN = "ci_set_stateless_asan" CI_SET_STATEFUL_ASAN = "ci_set_stateful_asan" libFuzzer = "libFuzzer" @@ -203,7 +203,7 @@ class DigestConfig: include_paths: List[Union[str, Path]] = field(default_factory=list) # file suffixes to exclude from digest exclude_files: List[str] = field(default_factory=list) - # directories to exlude from digest + # directories to exclude from digest exclude_dirs: List[Union[str, Path]] = field(default_factory=list) # docker names to include into digest docker: List[str] = field(default_factory=list) @@ -214,7 +214,7 @@ class DigestConfig: @dataclass class LabelConfig: """ - configures different CI scenarious per GH label + configures different CI scenarios per GH label """ run_jobs: Iterable[str] = frozenset() @@ -228,7 +228,7 @@ class JobConfig: # configures digest calculation for the job digest: DigestConfig = field(default_factory=DigestConfig) - # will be triggered for the job if omited in CI workflow yml + # will be triggered for the job if omitted in CI workflow yml run_command: str = "" # job timeout, seconds timeout: Optional[int] = None @@ -239,7 +239,7 @@ class JobConfig: # to run always regardless of the job digest or/and label run_always: bool = False # if the job needs to be run on the release branch, including master (e.g. building packages, docker server). - # NOTE: Subsequent runs on the same branch with the similar digest are still considered skippable. + # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. required_on_release_branch: bool = False # job is for pr workflow only pr_only: bool = False @@ -448,9 +448,9 @@ bugfix_validate_check = DigestConfig( ], exclude_files=[".md"], docker=IMAGES.copy() - + [ - "clickhouse/stateless-test", - ], + + [ + "clickhouse/stateless-test", + ], ) # common test params docker_server_job_config = JobConfig( @@ -467,7 +467,7 @@ compatibility_test_common_params = { "digest": compatibility_check_digest, "run_command": "compatibility_check.py", } -statless_test_common_params = { +stateless_test_common_params = { "digest": stateless_check_digest, "run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT', "timeout": 10800, @@ -570,7 +570,7 @@ class CIConfig: if self.is_build_job(job_name): stage_type = CIStages.BUILDS_1 if job_name in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK_SPECIAL + JobNames.BUILD_CHECK_SPECIAL ): # special builds go to Build_2 stage to not delay Builds_1/Test_1 stage_type = CIStages.BUILDS_2 @@ -584,7 +584,7 @@ class CIConfig: required_build = CI_CONFIG.test_configs[job_name].required_build assert required_build if required_build in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK + JobNames.BUILD_CHECK ): stage_type = CIStages.TESTS_1 else: @@ -597,10 +597,10 @@ class CIConfig: def get_job_config(self, check_name: str) -> JobConfig: res = None for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, + self.other_jobs_configs, ): if check_name in config: # type: ignore res = config[check_name].job_config # type: ignore @@ -612,47 +612,47 @@ class CIConfig: if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST: result = Runners.BUILDER elif any( - words in check_name.lower() - for words in [ - "install packages", - "compatibility check", - "docker", - "build check", - "jepsen", - "style check", - ] + words in check_name.lower() + for words in [ + "install packages", + "compatibility check", + "docker", + "build check", + "jepsen", + "style check", + ] ): result = Runners.STYLE_CHECKER elif check_name == JobNames.DOCS_CHECK: # docs job is demanding result = Runners.FUNC_TESTER_ARM elif any( - words in check_name.lower() - for words in [ - "stateless", - "stateful", - "clickbench", - "sqllogic test", - "libfuzzer", - "bugfix validation", - ] + words in check_name.lower() + for words in [ + "stateless", + "stateful", + "clickbench", + "sqllogic test", + "libfuzzer", + "bugfix validation", + ] ): result = Runners.FUNC_TESTER elif any( - words in check_name.lower() - for words in ["stress", "upgrade", "integration", "performance comparison"] + words in check_name.lower() + for words in ["stress", "upgrade", "integration", "performance comparison"] ): result = Runners.STRESS_TESTER elif any( - words in check_name.lower() - for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] + words in check_name.lower() + for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] ): result = Runners.FUZZER_UNIT_TESTER assert result, f"BUG, no runner for [{check_name}]" if ( - "aarch" in check_name.lower() or "arm64" in check_name.lower() + "aarch" in check_name.lower() or "arm64" in check_name.lower() ) and "aarch" not in result: if result == Runners.STRESS_TESTER: # FIXME: no arm stress tester group atm @@ -661,7 +661,7 @@ class CIConfig: # crosscompile - no arm required pass else: - # switch to aarch64 runnner + # switch to aarch64 runner result += "-aarch64" return result @@ -683,10 +683,10 @@ class CIConfig: check_name = self.normalize_string(check_name) for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, + self.other_jobs_configs, ): for job_name in config: # type: ignore if check_name == self.normalize_string(job_name): @@ -708,16 +708,16 @@ class CIConfig: break assert ( res - ), f"Error: Experimantal feature... Invlid request or not supported job [{check_name}]" + ), f"Error: Experimental feature... Invalid request or not supported job [{check_name}]" return res def get_digest_config(self, check_name: str) -> DigestConfig: res = None for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, + self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, ): if check_name in config: # type: ignore res = config[check_name].job_config.digest # type: ignore @@ -732,15 +732,15 @@ class CIConfig: """ assert branch for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, + self.other_jobs_configs, + self.build_config, + self.builds_report_config, + self.test_configs, ): yield from config # type: ignore def get_builds_for_report( - self, report_name: str, release: bool = False, backport: bool = False + self, report_name: str, release: bool = False, backport: bool = False ) -> List[str]: # hack to modify build list for release and bp wf assert not (release and backport), "Invalid input" @@ -811,16 +811,16 @@ class CIConfig: f"The following names of the build report '{build_report_name}' " f"are missed in build_config: {missed_names}", ) - # And finally, all of tests' requirements must be in the builds + # And finally, all tests' requirements must be in the builds for test_name, test_config in self.test_configs.items(): if test_config.required_build not in self.build_config.keys(): logging.error( - "The requierment '%s' for '%s' is not found in builds", + "The requirement '%s' for '%s' is not found in builds", test_config, test_name, ) errors.append( - f"The requierment '{test_config}' for " + f"The requirement '{test_config}' for " f"'{test_name}' is not found in builds" ) @@ -861,7 +861,7 @@ CI_CONFIG = CIConfig( JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, ] ), - CILabels.CI_SET_STATLESS: LabelConfig( + CILabels.CI_SET_STATELESS: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, JobNames.FAST_TEST, @@ -869,7 +869,7 @@ CI_CONFIG = CIConfig( JobNames.STATELESS_TEST_RELEASE, ] ), - CILabels.CI_SET_STATLESS_ASAN: LabelConfig( + CILabels.CI_SET_STATELESS_ASAN: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, JobNames.FAST_TEST, @@ -1155,68 +1155,63 @@ CI_CONFIG = CIConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( - Build.PACKAGE_MSAN, - job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, - job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), # End stateful tests for parallel replicas JobNames.STATELESS_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_TSAN: TestConfig( Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_MSAN: TestConfig( Build.PACKAGE_MSAN, - job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_UBSAN: TestConfig( Build.PACKAGE_UBSAN, - job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=2, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore + Build.PACKAGE_RELEASE, job_config=JobConfig(**stateless_test_common_params) # type: ignore ), JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig( Build.PACKAGE_RELEASE_COVERAGE, - job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_AARCH64: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params) # type: ignore + Build.PACKAGE_AARCH64, job_config=JobConfig(**stateless_test_common_params) # type: ignore ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore ), JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig( Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True, - run_by_ci_option=True), # type: ignore + job_config=JobConfig(num_batches=4, **stateless_test_common_params, release_only=True, run_by_ci_option=True), # type: ignore ), JobNames.STATELESS_TEST_S3_TSAN: TestConfig( Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore + job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore ), JobNames.STRESS_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore @@ -1225,39 +1220,28 @@ CI_CONFIG = CIConfig( Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) - # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, - job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) - # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) - # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.STRESS_TEST_AZURE_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore ), JobNames.STRESS_TEST_AZURE_MSAN: TestConfig( - Build.PACKAGE_MSAN, - job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True) # type: ignore ), JobNames.UPGRADE_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) - # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, - job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) - # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore @@ -1276,7 +1260,8 @@ CI_CONFIG = CIConfig( ), JobNames.INTEGRATION_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=5, **integration_test_common_params), # type: ignore + # add [run_by_label="test arm"] to not run in regular pr workflow by default + job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"), # type: ignore ), JobNames.INTEGRATION_TEST: TestConfig( Build.PACKAGE_RELEASE, @@ -1330,7 +1315,7 @@ CI_CONFIG = CIConfig( JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig( # replace to non-default Build.PACKAGE_ASAN, - job_config=JobConfig(pr_only=True, **{**statless_test_common_params, "timeout": 3600}), # type: ignore + job_config=JobConfig(pr_only=True, **{**stateless_test_common_params, "timeout": 3600}), # type: ignore ), JobNames.JEPSEN_KEEPER: TestConfig( Build.BINARY_RELEASE, @@ -1350,8 +1335,7 @@ CI_CONFIG = CIConfig( ), JobNames.PERFORMANCE_TEST_ARM64: TestConfig( Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), - # type: ignore + job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore ), JobNames.SQLANCER: TestConfig( Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params @@ -1381,6 +1365,7 @@ CI_CONFIG = CIConfig( ) CI_CONFIG.validate() + # checks required by Mergeable Check REQUIRED_CHECKS = [ "PR Check", @@ -1479,7 +1464,7 @@ CHECK_DESCRIPTIONS = [ "Checks if new added or modified tests are flaky by running them repeatedly, " "in parallel, with more randomization. Functional tests are run 100 times " "with address sanitizer, and additional randomization of thread scheduling. " - "Integrational tests are run up to 10 times. If at least once a new test has " + "Integration tests are run up to 10 times. If at least once a new test has " "failed, or was too long, this check will be red. We don't allow flaky tests, " 'read the doc', @@ -1569,7 +1554,7 @@ CHECK_DESCRIPTIONS = [ lambda x: x.startswith("ClickBench"), ), CheckDescription( - "Falback for unknown", + "Fallback for unknown", "There's no description for the check yet, please add it to " "tests/ci/ci_config.py:CHECK_DESCRIPTIONS", lambda x: True, diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 0f10f7d4f85..c07c094d439 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -161,7 +161,7 @@ class TestCIOptions(unittest.TestCase): "Stateless tests (azure, asan)": { "batches": list(range(3)), "num_batches": 3, - "run_if_ci_option_include_set": True, + "run_by_ci_option": True, } } jobs_to_do, jobs_to_skip, job_params = ci_options.apply( @@ -226,10 +226,10 @@ class TestCIOptions(unittest.TestCase): job_params[job] = { "batches": list(range(3)), "num_batches": 3, - "run_if_ci_option_include_set": "azure" in job, + "run_by_ci_option": "azure" in job, } else: - job_params[job] = {"run_if_ci_option_include_set": False} + job_params[job] = {"run_by_ci_option": False} jobs_to_do, jobs_to_skip, job_params = ci_options.apply( jobs_to_do, jobs_to_skip, job_params, PRInfo() From 2fe684da0917dfca12bce6fa215bd566370d9db5 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 16 May 2024 14:51:04 +0200 Subject: [PATCH 294/651] Add dynamic tests --- .../03150_dynamic_type_mv_insert.reference | 35 ++ .../03150_dynamic_type_mv_insert.sql | 34 ++ ...151_dynamic_type_scale_max_types.reference | 26 ++ .../03151_dynamic_type_scale_max_types.sql | 23 ++ .../03152_dynamic_type_simple.reference | 25 ++ .../0_stateless/03152_dynamic_type_simple.sql | 29 ++ .../03153_dynamic_type_empty.reference | 15 + .../0_stateless/03153_dynamic_type_empty.sql | 5 + ..._dynamic_type_concurrent_inserts.reference | 7 + .../03156_dynamic_type_concurrent_inserts.sh | 21 ++ .../03157_dynamic_type_json.reference | 5 + .../0_stateless/03157_dynamic_type_json.sql | 13 + .../03158_dynamic_type_from_variant.reference | 17 + .../03158_dynamic_type_from_variant.sql | 15 + .../03159_dynamic_type_all_types.reference | 300 ++++++++++++++++++ .../03159_dynamic_type_all_types.sql | 99 ++++++ .../03160_dynamic_type_agg.reference | 1 + .../0_stateless/03160_dynamic_type_agg.sql | 10 + .../03162_dynamic_type_nested.reference | 4 + .../0_stateless/03162_dynamic_type_nested.sql | 16 + 20 files changed, 700 insertions(+) create mode 100644 tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference create mode 100644 tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql create mode 100644 tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference create mode 100644 tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql create mode 100644 tests/queries/0_stateless/03152_dynamic_type_simple.reference create mode 100644 tests/queries/0_stateless/03152_dynamic_type_simple.sql create mode 100644 tests/queries/0_stateless/03153_dynamic_type_empty.reference create mode 100644 tests/queries/0_stateless/03153_dynamic_type_empty.sql create mode 100644 tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference create mode 100755 tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh create mode 100644 tests/queries/0_stateless/03157_dynamic_type_json.reference create mode 100644 tests/queries/0_stateless/03157_dynamic_type_json.sql create mode 100644 tests/queries/0_stateless/03158_dynamic_type_from_variant.reference create mode 100644 tests/queries/0_stateless/03158_dynamic_type_from_variant.sql create mode 100644 tests/queries/0_stateless/03159_dynamic_type_all_types.reference create mode 100644 tests/queries/0_stateless/03159_dynamic_type_all_types.sql create mode 100644 tests/queries/0_stateless/03160_dynamic_type_agg.reference create mode 100644 tests/queries/0_stateless/03160_dynamic_type_agg.sql create mode 100644 tests/queries/0_stateless/03162_dynamic_type_nested.reference create mode 100644 tests/queries/0_stateless/03162_dynamic_type_nested.sql diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference new file mode 100644 index 00000000000..0b76d30953e --- /dev/null +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference @@ -0,0 +1,35 @@ +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +3 1 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 Decimal(18, 3) +3 1 String +3 1 String +4 2 String +4 2 String + +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +4 2 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 Decimal(18, 3) +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +3 1 String +4 2 String +4 2 String +4 2 String diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql new file mode 100644 index 00000000000..ad5ea9512c6 --- /dev/null +++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql @@ -0,0 +1,34 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE null_table +( + n1 UInt8, + n2 Dynamic(max_types=3) +) +ENGINE = Null; + +CREATE MATERIALIZED VIEW dummy_rmv TO to_table +AS SELECT * FROM null_table; + +CREATE TABLE to_table +( + n1 UInt8, + n2 Dynamic(max_types=4) +) +ENGINE = MergeTree ORDER BY n1; + +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=10); +INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference new file mode 100644 index 00000000000..d96fbf658d8 --- /dev/null +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference @@ -0,0 +1,26 @@ +1 2024-01-01 Date +2 1704056400 String +3 1 String +4 2 String + +1 2024-01-01 Date +1 2024-01-01 Date +2 1704056400 Decimal(18, 3) +2 1704056400 String +3 1 Float32 +3 1 String +4 2 Float64 +4 2 String + +1 2024-01-01 String +1 2024-01-01 String +1 2024-01-01 String +2 1704056400 String +2 1704056400 String +2 1704056400 String +3 1 String +3 1 String +3 1 String +4 2 String +4 2 String +4 2 String diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql new file mode 100644 index 00000000000..04322fc4f0c --- /dev/null +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql @@ -0,0 +1,23 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE to_table +( + n1 UInt8, + n2 Dynamic(max_types=2) +) +ENGINE = MergeTree ORDER BY n1; + +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=5); +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +select ''; +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1); +INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2)); +SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL; + +ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=500); -- { serverError UNEXPECTED_AST_STRUCTURE } diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.reference b/tests/queries/0_stateless/03152_dynamic_type_simple.reference new file mode 100644 index 00000000000..5f243209ff3 --- /dev/null +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.reference @@ -0,0 +1,25 @@ +string1 String +42 Int64 +3.14 Float64 +[1,2] Array(Int64) +2021-01-01 Date +string2 String + +\N None 42 Int64 +42 Int64 string String +string String [1, 2] String +[1,2] Array(Int64) \N None + ┌─d────────────────────────┬─dynamicType(d)─┬─d.Int64─┬─d.String─┬─────d.Date─┬─d.Float64─┬──────────d.DateTime─┬─d.Array(Int64)─┬─d.Array(String)──────────┐ + 1. │ 42 │ Int64 │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 2. │ string1 │ String │ ᴺᵁᴸᴸ │ string1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 3. │ 2021-01-01 │ Date │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2021-01-01 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 4. │ [1,2,3] │ Array(Int64) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ [] │ + 5. │ 3.14 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 3.14 │ ᴺᵁᴸᴸ │ [] │ [] │ + 6. │ string2 │ String │ ᴺᵁᴸᴸ │ string2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ + 7. │ 2021-01-01 12:00:00 │ DateTime │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2021-01-01 12:00:00 │ [] │ [] │ + 8. │ ['array','of','strings'] │ Array(String) │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ ['array','of','strings'] │ + 9. │ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ [] │ +10. │ 42.42 │ Float64 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ [] │ + └──────────────────────────┴────────────────┴─────────┴──────────┴────────────┴───────────┴─────────────────────┴────────────────┴──────────────────────────┘ + +49995000 diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.sql b/tests/queries/0_stateless/03152_dynamic_type_simple.sql new file mode 100644 index 00000000000..fd5328faf15 --- /dev/null +++ b/tests/queries/0_stateless/03152_dynamic_type_simple.sql @@ -0,0 +1,29 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE test_max_types (d Dynamic(max_types=5)) ENGINE = Memory; +INSERT INTO test_max_types VALUES ('string1'), (42), (3.14), ([1, 2]), (toDate('2021-01-01')), ('string2'); +SELECT d, dynamicType(d) FROM test_max_types; + +SELECT ''; +CREATE TABLE test_nested_dynamic (d1 Dynamic, d2 Dynamic(max_types=2)) ENGINE = Memory; +INSERT INTO test_nested_dynamic VALUES (NULL, 42), (42, 'string'), ('string', [1, 2]), ([1, 2], NULL); +SELECT d1, dynamicType(d1), d2, dynamicType(d2) FROM test_nested_dynamic; + +CREATE TABLE test_rapid_schema (d Dynamic) ENGINE = Memory; +INSERT INTO test_rapid_schema VALUES (42), ('string1'), (toDate('2021-01-01')), ([1, 2, 3]), (3.14), ('string2'), (toDateTime('2021-01-01 12:00:00')), (['array', 'of', 'strings']), (NULL), (toFloat64(42.42)); + +SELECT d, dynamicType(d), d.Int64, d.String, d.Date, d.Float64, d.DateTime, d.`Array(Int64)`, d.`Array(String)` +FROM test_rapid_schema FORMAT PrettyCompactMonoBlock; + + +SELECT ''; +SELECT finalizeAggregation(CAST(dynamic_state, 'AggregateFunction(sum, UInt64)')) +FROM +( + SELECT CAST(state, 'Dynamic') AS dynamic_state + FROM + ( + SELECT sumState(number) AS state + FROM numbers(10000) + ) +); diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.reference b/tests/queries/0_stateless/03153_dynamic_type_empty.reference new file mode 100644 index 00000000000..f7c047dcd19 --- /dev/null +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.reference @@ -0,0 +1,15 @@ +[] String +[1] Array(Int64) +[] Array(Int64) +['1'] Array(String) +[] Array(Int64) +() String +(1) Tuple(Int64) +(0) Tuple(Int64) +('1') Tuple(String) +(0) Tuple(Int64) +{} String +{1:2} Map(Int64, Int64) +{} Map(Int64, Int64) +{'1':'2'} Map(String, String) +{} Map(Int64, Int64) diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.sql b/tests/queries/0_stateless/03153_dynamic_type_empty.sql new file mode 100644 index 00000000000..8e942fe6f6e --- /dev/null +++ b/tests/queries/0_stateless/03153_dynamic_type_empty.sql @@ -0,0 +1,5 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE test_null_empty (d Dynamic) ENGINE = Memory; +INSERT INTO test_null_empty VALUES ([]), ([1]), ([]), (['1']), ([]), (()),((1)), (()), (('1')), (()), ({}), ({1:2}), ({}), ({'1':'2'}), ({}); +SELECT d, dynamicType(d) FROM test_null_empty; diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference new file mode 100644 index 00000000000..e1c7b69b136 --- /dev/null +++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference @@ -0,0 +1,7 @@ +Array(UInt64) 12000 10000 +Date 12000 10001 +Float64 12000 10000 +Int64 10000 10000 +Map(UInt64, String) 10000 10000 +String 10000 10000 +UInt64 4000 4000 diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh new file mode 100755 index 00000000000..d7709b722c9 --- /dev/null +++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "CREATE TABLE test_cc (d Dynamic) ENGINE = Memory" + + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT number::Int64 AS d FROM numbers(10000) SETTINGS max_threads=1,max_insert_threads=1" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toString(number) AS d FROM numbers(10000) SETTINGS max_threads=2,max_insert_threads=2" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toDate(number % 10000) AS d FROM numbers(10000) SETTINGS max_threads=3,max_insert_threads=3" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT [number, number + 1] AS d FROM numbers(10000) SETTINGS max_threads=4,max_insert_threads=4" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toFloat64(number) AS d FROM numbers(10000) SETTINGS max_threads=5,max_insert_threads=5" & +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT map(number, toString(number)) AS d FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" & + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --use_variant_as_common_type=1 --allow_experimental_variant_type=1 -q "INSERT INTO test_cc SELECT CAST(multiIf(number % 5 = 0, toString(number), number % 5 = 1, number, number % 5 = 2, toFloat64(number), number % 5 = 3, toDate('2020-01-01'), [number, number + 1]), 'Dynamic') FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" & + +wait + +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "SELECT dynamicType(d) t, count(), uniqExact(d) FROM test_cc GROUP BY t ORDER BY t" diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.reference b/tests/queries/0_stateless/03157_dynamic_type_json.reference new file mode 100644 index 00000000000..38bca12bb95 --- /dev/null +++ b/tests/queries/0_stateless/03157_dynamic_type_json.reference @@ -0,0 +1,5 @@ +1 (((((((((('deep_value')))))))))) +2 (((((((((('deep_array_value')))))))))) + +(((((((((('deep_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) +(((((((((('deep_array_value')))))))))) Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String)))))))))) diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.sql b/tests/queries/0_stateless/03157_dynamic_type_json.sql new file mode 100644 index 00000000000..cb1a5987104 --- /dev/null +++ b/tests/queries/0_stateless/03157_dynamic_type_json.sql @@ -0,0 +1,13 @@ +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; + +CREATE TABLE test_deep_nested_json (i UInt16, d JSON) ENGINE = Memory; + +INSERT INTO test_deep_nested_json VALUES (1, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_value"}}}}}}}}}}'); +INSERT INTO test_deep_nested_json VALUES (2, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_array_value"}}}}}}}}}}'); + +SELECT * FROM test_deep_nested_json ORDER BY i; + +SELECT ''; +SELECT d::Dynamic d1, dynamicType(d1) FROM test_deep_nested_json ORDER BY i; diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference new file mode 100644 index 00000000000..2ede006cedc --- /dev/null +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference @@ -0,0 +1,17 @@ +false Variant(Bool, DateTime64(3), IPv6, String, UInt32) +false Variant(Bool, DateTime64(3), IPv6, String, UInt32) +true Variant(Bool, DateTime64(3), IPv6, String, UInt32) +2001-01-01 01:01:01.111 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +s Variant(Bool, DateTime64(3), IPv6, String, UInt32) +0 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +1 Variant(Bool, DateTime64(3), IPv6, String, UInt32) +\N Variant(Bool, DateTime64(3), IPv6, String, UInt32) + +false Bool +false Bool +true Bool +2001-01-01 01:01:01.111 DateTime64(3) +s String +0 UInt32 +1 UInt32 +\N None diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql new file mode 100644 index 00000000000..20a9e17a148 --- /dev/null +++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql @@ -0,0 +1,15 @@ +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; + +CREATE TABLE test_variable (v Variant(String, UInt32, IPv6, Bool, DateTime64)) ENGINE = Memory; +CREATE TABLE test_dynamic (d Dynamic) ENGINE = Memory; + +INSERT INTO test_variable VALUES (1), ('s'), (0), ('0'), ('true'), ('false'), ('2001-01-01 01:01:01.111'), (NULL); + +SELECT v, toTypeName(v) FROM test_variable ORDER BY v; + +INSERT INTO test_dynamic SELECT * FROM test_variable; + +SELECT ''; +SELECT d, dynamicType(d) FROM test_dynamic ORDER BY d; diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference new file mode 100644 index 00000000000..a162ec4f857 --- /dev/null +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference @@ -0,0 +1,300 @@ +Array(Dynamic) [] +Array(Array(Dynamic)) [[]] +Array(Array(Array(Dynamic))) [[[]]] +Bool false +Bool true +Date 2022-01-01 +Date32 2022-01-01 +DateTime 2022-01-01 01:01:01 +DateTime64(3) 2022-01-01 01:01:01.011 +Decimal(9, 1) -99999999.9 +Decimal(18, 2) -999999999.99 +Decimal(38, 3) -999999999.999 +Decimal(76, 4) -999999999.9999 +Float32 -inf +Float32 -inf +Float32 -inf +Float32 -3.4028233e38 +Float32 -1.1754942e-38 +Float32 -1e-45 +Float32 1e-45 +Float32 1.1754942e-38 +Float32 3.4028233e38 +Float32 inf +Float32 inf +Float32 inf +Float32 nan +Float32 nan +Float32 nan +Float64 -inf +Float64 -inf +Float64 -inf +Float64 -1.7976931348623157e308 +Float64 -3.40282347e38 +Float64 -1.1754943499999998e-38 +Float64 -1.3999999999999999e-45 +Float64 -2.2250738585072014e-308 +Float64 2.2250738585072014e-308 +Float64 1.3999999999999999e-45 +Float64 1.1754943499999998e-38 +Float64 3.40282347e38 +Float64 1.7976931348623157e308 +Float64 inf +Float64 inf +Float64 inf +Float64 nan +Float64 nan +Float64 nan +FixedString(1) 1 +FixedString(2) 1\0 +FixedString(10) 1\0\0\0\0\0\0\0\0\0 +IPv4 192.168.0.1 +IPv6 ::1 +Int8 -128 +Int8 -128 +Int8 -127 +Int8 -127 +Int8 -1 +Int8 -1 +Int8 0 +Int8 0 +Int8 1 +Int8 1 +Int8 126 +Int8 126 +Int8 127 +Int8 127 +Int16 -32768 +Int16 -32767 +Int16 -1 +Int16 0 +Int16 1 +Int16 32766 +Int16 32767 +Int32 -2147483648 +Int32 -2147483647 +Int32 -1 +Int32 0 +Int32 1 +Int32 2147483646 +Int32 2147483647 +Int64 -9223372036854775808 +Int64 -9223372036854775807 +Int64 -1 +Int64 0 +Int64 1 +Int64 9223372036854775806 +Int64 9223372036854775807 +Int128 -170141183460469231731687303715884105728 +Int128 -170141183460469231731687303715884105727 +Int128 -1 +Int128 0 +Int128 1 +Int128 170141183460469231731687303715884105726 +Int128 170141183460469231731687303715884105727 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819968 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819967 +Int256 -1 +Int256 0 +Int256 1 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819966 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819967 +IntervalDay 1 +IntervalYear 3 +IntervalMonth 2 +LowCardinality(String) 1 +LowCardinality(String) 1 +LowCardinality(UInt16) 0 +MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] +Map(Dynamic, Dynamic) {'11':'v1','22':'1'} +Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] +Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] +Object(\'json\') {"1":"2"} +Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2} +Object(Nullable(\'json\')) {"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null} +Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null} +Point (1.23,4.5600000000000005) +Ring [(1.23,4.5600000000000005),(2.34,5.67)] +String string +SimpleAggregateFunction(anyLast, Array(Int16)) [1,2] +Tuple(Dynamic) ('') +Tuple(Tuple(Dynamic)) (('')) +Tuple(Tuple(Tuple(Dynamic))) (((''))) +UUID 00000000-0000-0000-0000-000000000000 +UUID dededdb6-7835-4ce4-8d11-b5de6f2820e9 +UInt8 0 +UInt8 1 +UInt8 254 +UInt8 255 +UInt16 0 +UInt16 1 +UInt16 65534 +UInt16 65535 +UInt32 0 +UInt32 1 +UInt32 4294967294 +UInt32 4294967295 +UInt64 0 +UInt64 1 +UInt64 18446744073709551614 +UInt64 18446744073709551615 +UInt128 0 +UInt128 1 +UInt128 340282366920938463463374607431768211454 +UInt128 340282366920938463463374607431768211455 +UInt256 0 +UInt256 1 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 + +Array(Dynamic) [] +Array(Array(Dynamic)) [[]] +Array(Array(Array(Dynamic))) [[[]]] +Bool false +Bool true +Date 2022-01-01 +Date32 2022-01-01 +DateTime 2022-01-01 01:01:01 +DateTime64(3) 2022-01-01 01:01:01.011 +Decimal(9, 1) -99999999.9 +Decimal(18, 2) -999999999.99 +Decimal(38, 3) -999999999.999 +Decimal(76, 4) -999999999.9999 +Float32 -inf +Float32 -inf +Float32 -inf +Float32 -3.4028233e38 +Float32 -1.1754942e-38 +Float32 -1e-45 +Float32 1e-45 +Float32 1.1754942e-38 +Float32 3.4028233e38 +Float32 inf +Float32 inf +Float32 inf +Float32 nan +Float32 nan +Float32 nan +Float64 -inf +Float64 -inf +Float64 -inf +Float64 -1.7976931348623157e308 +Float64 -3.40282347e38 +Float64 -1.1754943499999998e-38 +Float64 -1.3999999999999999e-45 +Float64 -2.2250738585072014e-308 +Float64 2.2250738585072014e-308 +Float64 1.3999999999999999e-45 +Float64 1.1754943499999998e-38 +Float64 3.40282347e38 +Float64 1.7976931348623157e308 +Float64 inf +Float64 inf +Float64 inf +Float64 nan +Float64 nan +Float64 nan +FixedString(1) 1 +FixedString(2) 1\0 +FixedString(10) 1\0\0\0\0\0\0\0\0\0 +IPv4 192.168.0.1 +IPv6 ::1 +Int8 -128 +Int8 -128 +Int8 -127 +Int8 -127 +Int8 -1 +Int8 -1 +Int8 0 +Int8 0 +Int8 1 +Int8 1 +Int8 126 +Int8 126 +Int8 127 +Int8 127 +Int16 -32768 +Int16 -32767 +Int16 -1 +Int16 0 +Int16 1 +Int16 32766 +Int16 32767 +Int32 -2147483648 +Int32 -2147483647 +Int32 -1 +Int32 0 +Int32 1 +Int32 2147483646 +Int32 2147483647 +Int64 -9223372036854775808 +Int64 -9223372036854775807 +Int64 -1 +Int64 0 +Int64 1 +Int64 9223372036854775806 +Int64 9223372036854775807 +Int128 -170141183460469231731687303715884105728 +Int128 -170141183460469231731687303715884105727 +Int128 -1 +Int128 0 +Int128 1 +Int128 170141183460469231731687303715884105726 +Int128 170141183460469231731687303715884105727 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819968 +Int256 -57896044618658097711785492504343953926634992332820282019728792003956564819967 +Int256 -1 +Int256 0 +Int256 1 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819966 +Int256 57896044618658097711785492504343953926634992332820282019728792003956564819967 +IntervalDay 1 +IntervalYear 3 +IntervalMonth 2 +LowCardinality(String) 1 +LowCardinality(String) 1 +LowCardinality(UInt16) 0 +MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] +Map(Dynamic, Dynamic) {'11':'v1','22':'1'} +Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] +Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] +Object(\'json\') {"1":"2"} +Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2} +Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null} +Object(Nullable(\'json\')) {"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null} +Point (1.23,4.5600000000000005) +Ring [(1.23,4.5600000000000005),(2.34,5.67)] +String string +SimpleAggregateFunction(anyLast, Array(Int16)) [1,2] +Tuple(Dynamic) ('') +Tuple(Tuple(Dynamic)) (('')) +Tuple(Tuple(Tuple(Dynamic))) (((''))) +UUID 00000000-0000-0000-0000-000000000000 +UUID dededdb6-7835-4ce4-8d11-b5de6f2820e9 +UInt8 0 +UInt8 1 +UInt8 254 +UInt8 255 +UInt16 0 +UInt16 1 +UInt16 65534 +UInt16 65535 +UInt32 0 +UInt32 1 +UInt32 4294967294 +UInt32 4294967295 +UInt64 0 +UInt64 1 +UInt64 18446744073709551614 +UInt64 18446744073709551615 +UInt128 0 +UInt128 1 +UInt128 340282366920938463463374607431768211454 +UInt128 340282366920938463463374607431768211455 +UInt256 0 +UInt256 1 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 +UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 + +50 +50 diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql new file mode 100644 index 00000000000..38d70dee64e --- /dev/null +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql @@ -0,0 +1,99 @@ +-- Tags: no-random-settings + +SET allow_experimental_dynamic_type=1; +SET allow_experimental_object_type=1; +SET allow_experimental_variant_type=1; +SET allow_suspicious_low_cardinality_types=1; + + +CREATE TABLE t (d Dynamic(max_types=255)) ENGINE = Memory; +-- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256) +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-32768::Int16), (-32767::Int16), (-1::Int16), (0::Int16), (1::Int16), (32766::Int16), (32767::Int16); +INSERT INTO t VALUES (-2147483648::Int32), (-2147483647::Int32), (-1::Int32), (0::Int32), (1::Int32), (2147483646::Int32), (2147483647::Int32); +INSERT INTO t VALUES (-9223372036854775808::Int64), (-9223372036854775807::Int64), (-1::Int64), (0::Int64), (1::Int64), (9223372036854775806::Int64), (9223372036854775807::Int64); +INSERT INTO t VALUES (-170141183460469231731687303715884105728::Int128), (-170141183460469231731687303715884105727::Int128), (-1::Int128), (0::Int128), (1::Int128), (170141183460469231731687303715884105726::Int128), (170141183460469231731687303715884105727::Int128); +INSERT INTO t VALUES (-57896044618658097711785492504343953926634992332820282019728792003956564819968::Int256), (-57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256), (-1::Int256), (0::Int256), (1::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819966::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256); + +INSERT INTO t VALUES (0::UInt8), (1::UInt8), (254::UInt8), (255::UInt8); +INSERT INTO t VALUES (0::UInt16), (1::UInt16), (65534::UInt16), (65535::UInt16); +INSERT INTO t VALUES (0::UInt32), (1::UInt32), (4294967294::UInt32), (4294967295::UInt32); +INSERT INTO t VALUES (0::UInt64), (1::UInt64), (18446744073709551614::UInt64), (18446744073709551615::UInt64); +INSERT INTO t VALUES (0::UInt128), (1::UInt128), (340282366920938463463374607431768211454::UInt128), (340282366920938463463374607431768211455::UInt128); +INSERT INTO t VALUES (0::UInt256), (1::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639934::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639935::UInt256); + +-- Floating-point numbers: floats(Float32 and Float64) and Decimal values +INSERT INTO t VALUES (1.17549435e-38::Float32), (3.40282347e+38::Float32), (-3.40282347e+38::Float32), (-1.17549435e-38::Float32), (1.4e-45::Float32), (-1.4e-45::Float32); +INSERT INTO t VALUES (inf::Float32), (-inf::Float32), (nan::Float32); +INSERT INTO t VALUES (inf::FLOAT(12)), (-inf::FLOAT(12)), (nan::FLOAT(12)); +INSERT INTO t VALUES (inf::FLOAT(15,22)), (-inf::FLOAT(15,22)), (nan::FLOAT(15,22)); + +INSERT INTO t VALUES (1.17549435e-38::Float64), (3.40282347e+38::Float64), (-3.40282347e+38::Float64), (-1.17549435e-38::Float64), (1.4e-45::Float64), (-1.4e-45::Float64); +INSERT INTO t VALUES (2.2250738585072014e-308::Float64), (1.7976931348623157e+308::Float64), (-1.7976931348623157e+308::Float64), (-2.2250738585072014e-308::Float64); +INSERT INTO t VALUES (inf::Float64), (-inf::Float64), (nan::Float64); +INSERT INTO t VALUES (inf::DOUBLE(12)), (-inf::DOUBLE(12)), (nan::DOUBLE(12)); +INSERT INTO t VALUES (inf::DOUBLE(15,22)), (-inf::DOUBLE(15,22)), (nan::DOUBLE(15,22)); + +INSERT INTO t VALUES (-99999999.9::Decimal32(1)); +INSERT INTO t VALUES (-999999999.99::Decimal64(2)); +INSERT INTO t VALUES (-999999999.999::Decimal128(3)); +INSERT INTO t VALUES (-999999999.9999::Decimal256(4)); + +-- Strings: String and FixedString +INSERT INTO t VALUES ('string'::String), ('1'::FixedString(1)), ('1'::FixedString(2)), ('1'::FixedString(10)); --(''::String), + +-- Boolean +INSERT INTO t VALUES ('1'::Bool), (0::Bool); + +-- Dates: use Date and Date32 for days, and DateTime and DateTime64 for instances in time +INSERT INTO t VALUES ('2022-01-01'::Date), ('2022-01-01'::Date32), ('2022-01-01 01:01:01'::DateTime), ('2022-01-01 01:01:01.011'::DateTime64); + +-- JSON +INSERT INTO t VALUES ('{"1":"2"}'::JSON); +INSERT INTO t FORMAT JSONEachRow {"d" : {"k1" : 1, "k2" : 2}} {"d" : {"1" : 2, "2" : 3}} {"d" : {"2020-10-10" : "foo"}}; + +-- UUID +INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID); +INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID); + +-- LowCardinality +INSERT INTO t VALUES ('1'::LowCardinality(String)), ('1'::LowCardinality(String)), (0::LowCardinality(UInt16)); + +-- Arrays +INSERT INTO t VALUES ([]::Array(Dynamic)), ([[]]::Array(Array(Dynamic))), ([[[]]]::Array(Array(Array(Dynamic)))); + +-- Tuple +INSERT INTO t VALUES (()::Tuple(Dynamic)), ((())::Tuple(Tuple(Dynamic))), (((()))::Tuple(Tuple(Tuple(Dynamic)))); + +-- Map. +INSERT INTO t VALUES (map(11::Dynamic, 'v1'::Dynamic, '22'::Dynamic, 1::Dynamic)); + +-- SimpleAggregateFunction +INSERT INTO t VALUES ([1,2]::SimpleAggregateFunction(anyLast, Array(Int16))); + +-- IPs +INSERT INTO t VALUES (toIPv4('192.168.0.1')), (toIPv6('::1')); + +-- Geo +INSERT INTO t VALUES ((1.23, 4.56)::Point), (([(1.23, 4.56)::Point, (2.34, 5.67)::Point])::Ring); +INSERT INTO t VALUES ([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]::MultiPolygon); + +-- Interval +INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' year); + +-- Nested +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String)); +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))); + +SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d ; + +CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory; +INSERT INTO t2 SELECT * FROM t; + +SELECT ''; +SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; + +SELECT ''; +SELECT uniqExact(dynamicType(d)) t_ FROM t; +SELECT uniqExact(dynamicType(d)) t_ FROM t2; diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.reference b/tests/queries/0_stateless/03160_dynamic_type_agg.reference new file mode 100644 index 00000000000..54f6e428839 --- /dev/null +++ b/tests/queries/0_stateless/03160_dynamic_type_agg.reference @@ -0,0 +1 @@ +4950 4950 diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.sql b/tests/queries/0_stateless/03160_dynamic_type_agg.sql new file mode 100644 index 00000000000..f99232031a8 --- /dev/null +++ b/tests/queries/0_stateless/03160_dynamic_type_agg.sql @@ -0,0 +1,10 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE t (d Dynamic) ENGINE = Memory; + +INSERT INTO t SELECT sumState(number) AS d FROM numbers(100); + +SELECT finalizeAggregation(d.`AggregateFunction(sum, UInt64)`), + sumMerge(d.`AggregateFunction(sum, UInt64)`) +FROM t GROUP BY d.`AggregateFunction(sum, UInt64)`; + diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.reference b/tests/queries/0_stateless/03162_dynamic_type_nested.reference new file mode 100644 index 00000000000..8d5bcb5f85a --- /dev/null +++ b/tests/queries/0_stateless/03162_dynamic_type_nested.reference @@ -0,0 +1,4 @@ + ┌─dynamicType(d)──────────────┬─d─────────────────────────────────────────┬─d.Nested(x UInt32, y Dynamic).x─┬─d.Nested(x UInt32, y Dynamic).y───┬─dynamicType(arrayElement(d.Nested(x UInt32, y Dynamic).y, 1))─┬─d.Nested(x UInt32, y Dynamic).y.String─┬─d.Nested(x UInt32, y Dynamic).y.Tuple(Int64, Array(String))─┐ +1. │ Nested(x UInt32, y Dynamic) │ [(1,'aa'),(2,'bb')] │ [1,2] │ ['aa','bb'] │ String │ ['aa','bb'] │ [(0,[]),(0,[])] │ +2. │ Nested(x UInt32, y Dynamic) │ [(1,(2,['aa','bb'])),(5,(6,['ee','ff']))] │ [1,5] │ [(2,['aa','bb']),(6,['ee','ff'])] │ Tuple(Int64, Array(String)) │ [NULL,NULL] │ [(2,['aa','bb']),(6,['ee','ff'])] │ + └─────────────────────────────┴───────────────────────────────────────────┴─────────────────────────────────┴───────────────────────────────────┴───────────────────────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────────────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.sql b/tests/queries/0_stateless/03162_dynamic_type_nested.sql new file mode 100644 index 00000000000..94007459a9e --- /dev/null +++ b/tests/queries/0_stateless/03162_dynamic_type_nested.sql @@ -0,0 +1,16 @@ +SET allow_experimental_dynamic_type=1; + +CREATE TABLE t (d Dynamic) ENGINE = Memory; + +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y Dynamic)) ; +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb'])), (5, (6, ['ee', 'ff']))]::Nested(x UInt32, y Dynamic)); + +SELECT dynamicType(d), + d, + d.`Nested(x UInt32, y Dynamic)`.x, + d.`Nested(x UInt32, y Dynamic)`.y, + dynamicType(d.`Nested(x UInt32, y Dynamic)`.y[1]), + d.`Nested(x UInt32, y Dynamic)`.y.`String`, + d.`Nested(x UInt32, y Dynamic)`.y.`Tuple(Int64, Array(String))` +FROM t ORDER BY d +FORMAT PrettyCompactMonoBlock; From 4829db4d9e80a02eca4b08779bd645bcd3ed5ba7 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 16 May 2024 14:51:22 +0200 Subject: [PATCH 295/651] Add Dynamic type in fuzzer tests --- tests/fuzz/dictionaries/datatypes.dict | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict index 232e89db0c0..a01a94fd3e3 100644 --- a/tests/fuzz/dictionaries/datatypes.dict +++ b/tests/fuzz/dictionaries/datatypes.dict @@ -132,3 +132,4 @@ "YEAR" "bool" "boolean" +"Dynamic" From 73504a048bdc8076b079fcbe93578229348ef761 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 16 May 2024 14:51:57 +0200 Subject: [PATCH 296/651] Fix doc --- docs/en/sql-reference/data-types/dynamic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index a2c8ba532ce..eabf032c52f 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -355,7 +355,7 @@ SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic; - Compare `Dynamic` subcolumn with required type: ```sql -SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') +SELECT * FROM test WHERE d2.`Array(Int64)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') ``` ```text From bb130f429e09b20d74f4df550fc096bd68262a14 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 16 May 2024 12:40:44 +0000 Subject: [PATCH 297/651] fix reading of columns of type Tuple(Map(LowCardinality(...))) --- .../SerializationLowCardinality.cpp | 9 ++++- .../03156_tuple_map_low_cardinality.reference | 6 ++++ .../03156_tuple_map_low_cardinality.sql | 33 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference create mode 100644 tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 2d2be195098..18d6e48623b 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -515,8 +515,14 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( size_t limit, DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, - SubstreamsCache * /* cache */) const + SubstreamsCache * cache) const { + if (auto cached_column = getFromSubstreamsCache(cache, settings.path)) + { + column = cached_column; + return; + } + auto mutable_column = column->assumeMutable(); ColumnLowCardinality & low_cardinality_column = typeid_cast(*mutable_column); @@ -670,6 +676,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( } column = std::move(mutable_column); + addToSubstreamsCache(cache, settings.path, column); } void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference new file mode 100644 index 00000000000..5b2a36927ee --- /dev/null +++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference @@ -0,0 +1,6 @@ +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql new file mode 100644 index 00000000000..836b426a9a9 --- /dev/null +++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS t_map_lc; + +CREATE TABLE t_map_lc +( + id UInt64, + t Tuple(m Map(LowCardinality(String), LowCardinality(String))) +) +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000; + +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, mapKeys(t.m)); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.keys); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.values); +SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337'); + +DROP TABLE t_map_lc; + +CREATE TABLE t_map_lc +( + id UInt64, + t Tuple(m Map(LowCardinality(String), LowCardinality(String))) +) +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = '10G'; + +INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000; + +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, mapKeys(t.m)); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.keys); +SELECT count(), FROM t_map_lc WHERE NOT ignore(*, t.m.values); +SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337'); + +DROP TABLE t_map_lc; From d10bf725f030d9a2fd18b0dd87be409c22461eb5 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 16 May 2024 14:57:22 +0200 Subject: [PATCH 298/651] ci config update to enable job --- tests/ci/ci_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index dc67e05455c..60ad6933afc 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1260,8 +1260,7 @@ CI_CONFIG = CIConfig( ), JobNames.INTEGRATION_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, - # add [run_by_label="test arm"] to not run in regular pr workflow by default - job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"), # type: ignore + job_config=JobConfig(num_batches=5, **integration_test_common_params), # type: ignore ), JobNames.INTEGRATION_TEST: TestConfig( Build.PACKAGE_RELEASE, From 20b0a208bfdddd68f04c18ff74b3e2d4c99e2e2d Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 16 May 2024 15:04:13 +0200 Subject: [PATCH 299/651] Add proportionsZTest to docs --- .../functions/other-functions.md | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 2b0215115cb..64f823d0656 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -903,6 +903,52 @@ SELECT parseTimeDelta('1yr2mo') └──────────────────────────┘ ``` +## proportionsZTest + +Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`. + +**Syntax** + +```sql +proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type) +``` + +**Arguments** + +- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md). +- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md). +- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md). +- `trials_y`: NUmber of trials in population `y`. [UInt64](../data-types/int-uint.md). +- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md). +- `pool_type`: Selection of pooling (way in which the standard error is estimated). can be either `unpooled` or `pooled`. [String](../data-types/string.md). + +:::note +For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately. +::: + +**Returned value** + +- `z_stat`: Z statistic. [Float64](../data-types/float.md). +- `p_val`: P value. [Float64](../data-types/float.md). +- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md). +- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); +``` + +Result: + +```response +┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐ +│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## least(a, b) Returns the smaller value of a and b. From 0b07f2236c07b14f853c6e78844690c064617c09 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 16 May 2024 13:42:49 +0000 Subject: [PATCH 300/651] add test --- .../03155_in_nested_subselects.reference | 4 ++++ .../03155_in_nested_subselects.sql | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tests/queries/0_stateless/03155_in_nested_subselects.reference create mode 100644 tests/queries/0_stateless/03155_in_nested_subselects.sql diff --git a/tests/queries/0_stateless/03155_in_nested_subselects.reference b/tests/queries/0_stateless/03155_in_nested_subselects.reference new file mode 100644 index 00000000000..5565ed6787f --- /dev/null +++ b/tests/queries/0_stateless/03155_in_nested_subselects.reference @@ -0,0 +1,4 @@ +0 +1 +0 +1 diff --git a/tests/queries/0_stateless/03155_in_nested_subselects.sql b/tests/queries/0_stateless/03155_in_nested_subselects.sql new file mode 100644 index 00000000000..4f5ccd30aa3 --- /dev/null +++ b/tests/queries/0_stateless/03155_in_nested_subselects.sql @@ -0,0 +1,19 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/63833 +SET allow_experimental_analyzer = 1; + +create table Example (id Int32) engine = MergeTree ORDER BY id; +INSERT INTO Example SELECT number AS id FROM numbers(2); + +create table Null engine=Null as Example ; +--create table Null engine=MergeTree order by id as Example ; + +create materialized view Transform to Example as +select * from Null +join ( select * FROM Example + WHERE id IN (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM Null))))) + ) as old +using id; + +INSERT INTO Null SELECT number AS id FROM numbers(2); + +select * from Example; -- should return 4 rows From 3ff2ec0a7d8d3006ccf90250cb95b6ac7c1e872e Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 16 May 2024 15:58:27 +0200 Subject: [PATCH 301/651] Fix segfault --- src/Storages/ObjectStorage/StorageObjectStorageSource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 9c67a125f5e..abaf51edc4e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -62,7 +62,7 @@ protected: const std::optional format_settings; const UInt64 max_block_size; const bool need_only_count; - const ReadFromFormatInfo & read_from_format_info; + const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; ColumnsDescription columns_desc; From 17aa7991016875df603bec8495e17d3c1dbb7d3a Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 16 May 2024 16:43:54 +0200 Subject: [PATCH 302/651] CI: aarh64: disable kerberos tests --- tests/ci/ci_config.py | 2 +- tests/integration/test_storage_kerberized_kafka/test.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 60ad6933afc..4761b5b450f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1260,7 +1260,7 @@ CI_CONFIG = CIConfig( ), JobNames.INTEGRATION_TEST_ARM: TestConfig( Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=5, **integration_test_common_params), # type: ignore + job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), JobNames.INTEGRATION_TEST: TestConfig( Build.PACKAGE_RELEASE, diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 451e1ab2ccf..24d10d7ff83 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -5,7 +5,7 @@ import time import pytest import logging -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, is_arm from helpers.test_tools import TSV from helpers.client import QueryRuntimeException @@ -18,6 +18,10 @@ from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsReques from kafka.protocol.group import MemberAssignment import socket +if is_arm(): + # skip due to no arm support for clickhouse/kerberos-kdc docker image + pytestmark = pytest.mark.skip + cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", From 93601066ea74a11da2dffedf6289e442997afaf9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 16 May 2024 14:54:21 +0000 Subject: [PATCH 303/651] Automatic style fix --- tests/ci/ci.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 3ed584f5d93..9c2ded20cff 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -968,10 +968,7 @@ class CiOptions: for job in jobs_to_do[:]: job_param = jobs_params[job] - if ( - job_param["run_by_ci_option"] - and job not in jobs_to_do_requested - ): + if job_param["run_by_ci_option"] and job not in jobs_to_do_requested: print( f"Erasing job '{job}' from list because it's not in included set, but will run only by include" ) @@ -1445,8 +1442,7 @@ def _configure_jobs( jobs_params[job] = { "batches": batches_to_do, "num_batches": num_batches, - "run_by_ci_option": job_config.run_by_ci_option - and pr_info.is_pr, + "run_by_ci_option": job_config.run_by_ci_option and pr_info.is_pr, } elif add_to_skip: # treat job as being skipped only if it's controlled by digest From d8941873ec0fca6b4a2f6f27e2b095d46ac75753 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 16 May 2024 17:38:15 +0200 Subject: [PATCH 304/651] Fix typo --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 64f823d0656..288432167bb 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -918,7 +918,7 @@ proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_ - `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md). - `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md). - `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md). -- `trials_y`: NUmber of trials in population `y`. [UInt64](../data-types/int-uint.md). +- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md). - `conf_level`: Confidence level for the test. [Float64](../data-types/float.md). - `pool_type`: Selection of pooling (way in which the standard error is estimated). can be either `unpooled` or `pooled`. [String](../data-types/string.md). From 9f70cb7cbfea827dcd2458beb5545608d14a5f02 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 16 May 2024 17:39:18 +0200 Subject: [PATCH 305/651] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a69ca0fb644..bea838c1269 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -759,6 +759,7 @@ Promtail Protobuf ProtobufSingle ProxySQL +proportionsZTest Punycode PyArrow PyCharm @@ -2753,6 +2754,7 @@ unixODBC unixodbc unoptimized unparsed +unpooled unrealiable unreplicated unresolvable From bdfabc1d40d5f83d6b88a81b6322dbe3265ac2a1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 16 May 2024 15:44:19 +0000 Subject: [PATCH 306/651] better comment --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 5359ac8b97e..7091f16b516 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5616,7 +5616,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi if (StoragePtr storage = scope.context->getViewSource()) { QueryTreeNodePtr table_expression; - /// Don't even ask... there's turtles all the way down... + /// Process possibly nested sub-selects for (auto * query_node = in_second_argument->as(); query_node; query_node = table_expression->as()) table_expression = extractLeftTableExpression(query_node->getJoinTree()); From 49a52cdb3f0bb1f105ea9814bdaedb97f8d23249 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 5 Mar 2024 11:31:12 +0100 Subject: [PATCH 307/651] Remove http_max_chunk_size setting (too internal) Signed-off-by: Azat Khuzhin --- programs/keeper/Keeper.cpp | 5 ----- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/Context.h | 5 ----- src/Server/HTTP/HTTPContext.h | 1 - src/Server/HTTP/HTTPServerRequest.cpp | 4 +++- 6 files changed, 5 insertions(+), 13 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a558ed64bf9..267b725b02b 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -249,11 +249,6 @@ struct KeeperHTTPContext : public IHTTPContext return context->getConfigRef().getUInt64("keeper_server.http_max_field_value_size", 128 * 1024); } - uint64_t getMaxChunkSize() const override - { - return context->getConfigRef().getUInt64("keeper_server.http_max_chunk_size", 100_GiB); - } - Poco::Timespan getReceiveTimeout() const override { return {context->getConfigRef().getInt64("keeper_server.http_receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0}; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 05a49029ff8..919a2c26211 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -362,7 +362,6 @@ class IColumn; M(UInt64, http_max_fields, 1000000, "Maximum number of fields in HTTP header", 0) \ M(UInt64, http_max_field_name_size, 128 * 1024, "Maximum length of field name in HTTP header", 0) \ M(UInt64, http_max_field_value_size, 128 * 1024, "Maximum length of field value in HTTP header", 0) \ - M(UInt64, http_max_chunk_size, 100_GiB, "Maximum value of a chunk size in HTTP chunked transfer encoding", 0) \ M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \ M(Bool, http_make_head_request, true, "Allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size", 0) \ M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \ @@ -985,6 +984,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \ MAKE_OBSOLETE(M, Bool, query_plan_optimize_primary_key, true) \ MAKE_OBSOLETE(M, Bool, optimize_monotonous_functions_in_order_by, false) \ + MAKE_OBSOLETE(M, UInt64, http_max_chunk_size, 100_GiB) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 5f3e9ffb611..11ebf0d2eed 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -90,6 +90,7 @@ static std::map sett {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."}, {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, + {"http_max_chunk_size", 0, 0, "Internal limitation"}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 0, 1, "Applies better rendering for multiline strings."}, diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index a6ab503430b..3b420b9ac1b 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1387,11 +1387,6 @@ struct HTTPContext : public IHTTPContext return context->getSettingsRef().http_max_field_value_size; } - uint64_t getMaxChunkSize() const override - { - return context->getSettingsRef().http_max_chunk_size; - } - Poco::Timespan getReceiveTimeout() const override { return context->getSettingsRef().http_receive_timeout; diff --git a/src/Server/HTTP/HTTPContext.h b/src/Server/HTTP/HTTPContext.h index 09c46ed188c..86054827632 100644 --- a/src/Server/HTTP/HTTPContext.h +++ b/src/Server/HTTP/HTTPContext.h @@ -12,7 +12,6 @@ struct IHTTPContext virtual uint64_t getMaxFields() const = 0; virtual uint64_t getMaxFieldNameSize() const = 0; virtual uint64_t getMaxFieldValueSize() const = 0; - virtual uint64_t getMaxChunkSize() const = 0; virtual Poco::Timespan getReceiveTimeout() const = 0; virtual Poco::Timespan getSendTimeout() const = 0; diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index 9db02eac220..3e82ec82550 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -20,6 +20,8 @@ #include #endif +static constexpr UInt64 HTTP_MAX_CHUNK_SIZE = 100ULL << 30; + namespace DB { HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session, const ProfileEvents::Event & read_event) @@ -54,7 +56,7 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse /// and retry with exactly the same (incomplete) set of rows. /// That's why we have to check body size if it's provided. if (getChunkedTransferEncoding()) - stream = std::make_unique(std::move(in), context->getMaxChunkSize()); + stream = std::make_unique(std::move(in), HTTP_MAX_CHUNK_SIZE); else if (hasContentLength()) { size_t content_length = getContentLength(); From e4eaf256b1746420ef359deef1af788eab02f0d8 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 16 May 2024 16:33:41 +0200 Subject: [PATCH 308/651] Analyzer: Fix COLUMNS resolve --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 30 +++++++++++++++++-- .../03152_analyzer_columns_list.reference | 1 + .../03152_analyzer_columns_list.sql | 1 + 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03152_analyzer_columns_list.reference create mode 100644 tests/queries/0_stateless/03152_analyzer_columns_list.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f0a3a2c74b6..dad1b41c7af 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -4608,6 +4608,34 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( std::unordered_set table_expression_column_names_to_skip; + QueryTreeNodesWithNames result; + + if (matcher_node_typed.getMatcherType() == MatcherNodeType::COLUMNS_LIST) + { + auto identifiers = matcher_node_typed.getColumnsIdentifiers(); + result.reserve(identifiers.size()); + + for (const auto & identifier : identifiers) + { + auto resolve_result = tryResolveIdentifier(IdentifierLookup{identifier, IdentifierLookupContext::EXPRESSION}, scope); + if (!resolve_result.isResolved()) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown identifier '{}' inside COLUMNS matcher. In scope {}", + identifier.getFullName(), scope.dump()); + + // TODO: Introduce IdentifierLookupContext::COLUMN and get read of this check + auto * resolved_column = resolve_result.resolved_identifier->as(); + if (!resolved_column) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Identifier '{}' inside COLUMNS matcher must resolve into a column, but got {}. In scope {}", + identifier.getFullName(), resolve_result.resolved_identifier->getNodeTypeName(), scope.dump()); + result.emplace_back(resolve_result.resolved_identifier, resolved_column->getColumnName()); + } + return result; + } + + result.resize(matcher_node_typed.getColumnsIdentifiers().size()); + for (auto & table_expression : table_expressions_stack) { bool table_expression_in_resolve_process = nearest_query_scope->table_expressions_in_resolve_process.contains(table_expression.get()); @@ -4775,8 +4803,6 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_column_nodes_with_names)); } - QueryTreeNodesWithNames result; - for (auto & table_expression_column_nodes_with_names : table_expressions_column_nodes_with_names_stack) { for (auto && table_expression_column_node_with_name : table_expression_column_nodes_with_names) diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.reference b/tests/queries/0_stateless/03152_analyzer_columns_list.reference new file mode 100644 index 00000000000..eefa8ebd513 --- /dev/null +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.reference @@ -0,0 +1 @@ +11323 8 diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql new file mode 100644 index 00000000000..5a7e3e9696e --- /dev/null +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql @@ -0,0 +1 @@ +SELECT COLUMNS(license_text, library_name) APPLY (length) FROM system.licenses ORDER BY library_name LIMIT 1; From d5b690339309ba5082e20af294dcabf5ec306a7c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 16 May 2024 16:49:28 +0200 Subject: [PATCH 309/651] Cleanup and add test --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 6 ++++-- tests/queries/0_stateless/03152_analyzer_columns_list.sql | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index dad1b41c7af..dfc5ebb3532 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -4623,12 +4623,14 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( "Unknown identifier '{}' inside COLUMNS matcher. In scope {}", identifier.getFullName(), scope.dump()); - // TODO: Introduce IdentifierLookupContext::COLUMN and get read of this check + // TODO: Introduce IdentifierLookupContext::COLUMN and get rid of this check auto * resolved_column = resolve_result.resolved_identifier->as(); if (!resolved_column) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier '{}' inside COLUMNS matcher must resolve into a column, but got {}. In scope {}", - identifier.getFullName(), resolve_result.resolved_identifier->getNodeTypeName(), scope.dump()); + identifier.getFullName(), + resolve_result.resolved_identifier->getNodeTypeName(), + scope.scope_node->formatASTForErrorMessage()); result.emplace_back(resolve_result.resolved_identifier, resolved_column->getColumnName()); } return result; diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql index 5a7e3e9696e..2b19cdf37a2 100644 --- a/tests/queries/0_stateless/03152_analyzer_columns_list.sql +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql @@ -1 +1,3 @@ SELECT COLUMNS(license_text, library_name) APPLY (length) FROM system.licenses ORDER BY library_name LIMIT 1; + +SELECT COLUMNS(license_text, library_name, xyz) APPLY (length) FROM system.licenses; -- { serverError UNKNOWN_IDENTIFIER } From 2f6a4e2a4f3dc0c116b2421cbd18db6ef68cd08f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 16 May 2024 16:55:51 +0000 Subject: [PATCH 310/651] WithMergeableStateAfterAggregation is not a good idea. --- src/Planner/PlannerExpressionAnalysis.cpp | 2 +- .../0_stateless/01757_optimize_skip_unused_shards_limit.sql | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index ad8db83d66c..6ad10d91832 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -545,7 +545,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo * otherwise coordinator does not find it in block. */ NameSet required_output_nodes_names; - if (sort_analysis_result_optional.has_value() && !planner_query_processing_info.isSecondStage()) + if (sort_analysis_result_optional.has_value() && planner_query_processing_info.isFirstStage() && planner_query_processing_info.getToStage() != QueryProcessingStage::Complete) { const auto & before_order_by_actions = sort_analysis_result_optional->before_order_by_actions; for (const auto & output_node : before_order_by_actions->getOutputs()) diff --git a/tests/queries/0_stateless/01757_optimize_skip_unused_shards_limit.sql b/tests/queries/0_stateless/01757_optimize_skip_unused_shards_limit.sql index 22590a20223..3f97b912105 100644 --- a/tests/queries/0_stateless/01757_optimize_skip_unused_shards_limit.sql +++ b/tests/queries/0_stateless/01757_optimize_skip_unused_shards_limit.sql @@ -34,3 +34,6 @@ select * from dist_01757 where dummy in (0, 1) settings optimize_skip_unused_sha select * from dist_01757 where dummy in (0, 1) settings optimize_skip_unused_shards_limit=9223372036854775808; -- { serverError 69 } drop table dist_01757; + +-- fuzzed +SELECT * FROM remote('127.0.0.{1,2}', numbers(40), number) ORDER BY 'a' LIMIT 1 BY number SETTINGS optimize_skip_unused_shards = 1, force_optimize_skip_unused_shards=0 format Null From fd322df568d115afe9d2d9564aa7d64f8b3bc114 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 16 May 2024 10:24:56 -0700 Subject: [PATCH 311/651] fix_client_abort: Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode --- src/Parsers/Kusto/ParserKQLStatement.cpp | 83 ++++++++++++------------ 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index fbf2110e664..e508b69bdff 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -62,49 +63,51 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; - ASTPtr select; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + ParserToken lparen(TokenType::OpeningRoundBracket); - auto begin = pos; - auto paren_count = 0; + ASTPtr string_literal; + ParserStringLiteral parser_string_literal; + + if (!lparen.ignore(pos, expected)) + return false; + + size_t paren_count = 0; String kql_statement; - - if (s_lparen.ignore(pos, expected)) + if (parser_string_literal.parse(pos, string_literal, expected)) { - if (pos->type == TokenType::HereDoc) - { - kql_statement = String(pos->begin + 2, pos->end - 2); - } - else - { - ++paren_count; - auto pos_start = pos; - while (isValidKQLPos(pos)) - { - if (pos->type == TokenType::ClosingRoundBracket) - --paren_count; - if (pos->type == TokenType::OpeningRoundBracket) - ++paren_count; - - if (paren_count == 0) - break; - ++pos; - } - kql_statement = String(pos_start->begin, (--pos)->end); - } - ++pos; - Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); - - if (kql_p.parse(pos_kql, select, expected)) - { - node = select; - ++pos; - return true; - } + kql_statement = typeid_cast(*string_literal).value.safeGet(); } - pos = begin; - return false; + else + { + ++paren_count; + auto pos_start = pos; + while (isValidKQLPos(pos)) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + ++pos; + } + if (!isValidKQLPos(pos)) + { + return false; + } + --pos; + kql_statement = String(pos_start->begin, pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.data(), kql_statement.data() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); + Expected kql_expected; + kql_expected.enable_highlighting = false; + if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected)) + return false; + ++pos; + return true; } } From b82eeeee88b521f5a4beb4a20006a452f0c0bb35 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 16 May 2024 17:43:59 +0000 Subject: [PATCH 312/651] Check what would be broken if do not add all the identifiers to functions map. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 52efee03ae4..d83b1b847bf 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1039,10 +1039,6 @@ private: auto [_, inserted] = scope.alias_name_to_expression_node.insert(std::make_pair(alias, node)); if (!inserted) scope.nodes_with_duplicated_aliases.insert(node); - - /// If node is identifier put it also in scope alias name to lambda node map - if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) - scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); } IdentifierResolveScope & scope; From d34e06ce4275f9db0767137422e7caab4076fa9a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 16 May 2024 17:55:07 +0000 Subject: [PATCH 313/651] Add a comment. --- src/Planner/PlannerActionsVisitor.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index c2ab1a99910..a88c74d460b 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -243,6 +243,14 @@ public: } case QueryTreeNodeType::LAMBDA: { + /// Initially, the action name was `"__lambda_" + toString(node->getTreeHash());`. + /// This is not a good idea because: + /// * hash is different on initiator and shard if the default database is changed in cluster + /// * hash is reliable only within one node; any change will break queries in between versions + /// + /// Now, we calculate execution name as (names + types) for lambda arguments + action name (expression) + /// and this should be more reliable (as long as we trust the calculation of action name for functions) + WriteBufferFromOwnString buffer; const auto & lambda_node = node->as(); From 9d8baa894af26d10b9b6c2699b379424ae198db1 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Thu, 16 May 2024 11:59:37 -0700 Subject: [PATCH 314/651] [Docs] Incorrect sample syntax for cutToFirstSignificantSubdomainCustom(URL, TLD) --- docs/en/sql-reference/functions/url-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index a0b0170721c..cf2940d63e1 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -152,7 +152,7 @@ Configuration example: **Syntax** ``` sql -cutToFirstSignificantSubdomain(URL, TLD) +cutToFirstSignificantSubdomainCustom(URL, TLD) ``` **Arguments** From 3fe9255d74d3b274e530208b7f2a76927f6b5728 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 16 May 2024 19:19:51 +0000 Subject: [PATCH 315/651] Fix tests flakiness --- .../Serializations/SerializationDynamic.cpp | 2 +- .../03037_dynamic_merges_1.reference | 120 ------------------ ...3037_dynamic_merges_1_horizontal.reference | 60 +++++++++ .../03037_dynamic_merges_1_horizontal.sh | 52 ++++++++ .../03037_dynamic_merges_1_vertical.reference | 60 +++++++++ ....sh => 03037_dynamic_merges_1_vertical.sh} | 17 +-- .../03039_dynamic_all_merge_algorithms_1.sh | 6 +- .../03040_dynamic_type_alters_1.sh | 2 +- 8 files changed, 180 insertions(+), 139 deletions(-) delete mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference create mode 100755 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference rename tests/queries/0_stateless/{03037_dynamic_merges_1.sh => 03037_dynamic_merges_1_vertical.sh} (79%) diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index cb9d4a2f7bc..6351ff0ca0b 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -33,7 +33,7 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB /// Variants statistics. Map (Variant name) -> (Variant size). ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} }; - SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} + explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {} }; struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.reference b/tests/queries/0_stateless/03037_dynamic_merges_1.reference deleted file mode 100644 index 0a647b41c4b..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1.reference +++ /dev/null @@ -1,120 +0,0 @@ -MergeTree compact + horizontal merge -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree wide + horizontal merge -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree compact + vertical merge -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree wide + vertical merge -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference new file mode 100644 index 00000000000..59297e46330 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference @@ -0,0 +1,60 @@ +MergeTree compact +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh new file mode 100755 index 00000000000..0d3cd45666a --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 " + +function test() +{ + echo "test" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" + $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" + $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" + $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" + $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final;" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference new file mode 100644 index 00000000000..59297e46330 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference @@ -0,0 +1,60 @@ +MergeTree compact +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String +MergeTree wide +test +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh similarity index 79% rename from tests/queries/0_stateless/03037_dynamic_merges_1.sh rename to tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh index 056f6702727..b2c40668228 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_1.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh @@ -7,8 +7,8 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 " function test() { @@ -40,23 +40,12 @@ function test() $CH_CLIENT -q "drop table if exists test;" -echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;" -test -$CH_CLIENT -q "drop table test;" - - -echo "MergeTree compact + vertical merge" +echo "MergeTree compact" $CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" test $CH_CLIENT -q "drop table test;" -echo "MergeTree wide + vertical merge" +echo "MergeTree wide" $CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" test $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh index 198c6ca93ff..0941f2da369 100755 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0" function test() @@ -53,10 +53,10 @@ function test() $CH_CLIENT -q "drop table if exists test;" echo "MergeTree compact + horizontal merge" -test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" echo "MergeTree wide + horizontal merge" -test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" echo "MergeTree compact + vertical merge" test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index 1f2a6a31ad7..7a73be20a4d 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1" function run() { From 847a184ace74533145129aa68efa92283ef0fc52 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 16 May 2024 20:58:45 +0100 Subject: [PATCH 316/651] fix --- .../MergeTree/ParallelReplicasReadingCoordinator.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index ec849b65c1a..c604cf253af 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -1063,10 +1063,7 @@ ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t re { } -ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() -{ - chassert(pimpl); -} +ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() = default; void ParallelReplicasReadingCoordinator::setProgressCallback(ProgressCallback callback) { From cd73164005de04b1c198ec810ed75926c72f9074 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 16 May 2024 21:08:29 +0000 Subject: [PATCH 317/651] slightly better setting force_optimize_projection_name --- src/Interpreters/executeQuery.cpp | 28 --------------- .../QueryPlan/Optimizations/Optimizations.h | 7 ++-- .../QueryPlanOptimizationSettings.cpp | 2 +- .../QueryPlan/Optimizations/optimizeTree.cpp | 24 ++++++++----- .../optimizeUseAggregateProjection.cpp | 35 ++++++------------- .../optimizeUseNormalProjection.cpp | 19 +++++----- ...6_force_optimize_projection_name.reference | 2 ++ .../02906_force_optimize_projection_name.sql | 15 ++++++++ 8 files changed, 59 insertions(+), 73 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 33a4cf2a74c..67797f7d4f6 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1254,34 +1254,6 @@ static std::tuple executeQueryImpl( } } } - // Here we check if our our projections contain force_optimize_projection_name - if (!settings.force_optimize_projection_name.value.empty()) - { - bool found = false; - std::set projections; - { - const auto & access_info = context->getQueryAccessInfo(); - std::lock_guard lock(access_info.mutex); - projections = access_info.projections; - } - - for (const auto &projection : projections) - { - // projection value has structure like: .. - // We need to get only the projection name - size_t last_dot_pos = projection.find_last_of('.'); - std::string projection_name = (last_dot_pos != std::string::npos) ? projection.substr(last_dot_pos + 1) : projection; - if (settings.force_optimize_projection_name.value == projection_name) - { - found = true; - break; - } - } - - if (!found) - throw Exception(ErrorCodes::INCORRECT_DATA, "Projection {} is specified in setting force_optimize_projection_name but not used", - settings.force_optimize_projection_name.value); - } if (process_list_entry) { diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 18f1496d26a..b33a373a970 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -111,8 +111,11 @@ void optimizePrimaryKeyCondition(const Stack & stack); void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes); void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); -bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); -bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); + +/// Returns the name of used projection or nullopt if no projection is used. +std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); +std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); + bool addPlansForSets(QueryPlan & plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes); /// Enable memory bound merging of aggregation states for remote queries diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 80923159ddc..2738de1ff5f 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -46,7 +46,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.optimize_projection = from.optimize_use_projections; settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection; - settings.force_projection_name = from.force_optimize_projection_name; + settings.force_projection_name = settings.optimize_projection ? from.force_optimize_projection_name.value : ""; settings.optimize_use_implicit_projections = settings.optimize_projection && from.optimize_use_implicit_projections; return settings; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 915e664ea8f..4f512016c6b 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -106,7 +106,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { const size_t max_optimizations_to_apply = optimization_settings.max_optimizations_to_apply; - size_t num_applied_projection = 0; + std::unordered_set applied_projection_names; bool has_reading_from_mt = false; Stack stack; @@ -159,9 +159,11 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s /// Projection optimization relies on PK optimization if (optimization_settings.optimize_projection) - num_applied_projection - += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); - + { + auto applied_projection = optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); + if (applied_projection) + applied_projection_names.insert(*applied_projection); + } if (optimization_settings.aggregation_in_order) optimizeAggregationInOrder(*frame.node, nodes); @@ -180,11 +182,11 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.optimize_projection) { /// Projection optimization relies on PK optimization - if (optimizeUseNormalProjections(stack, nodes)) + if (auto applied_projection = optimizeUseNormalProjections(stack, nodes)) { - ++num_applied_projection; + applied_projection_names.insert(*applied_projection); - if (max_optimizations_to_apply && max_optimizations_to_apply < num_applied_projection) + if (max_optimizations_to_apply && max_optimizations_to_apply < applied_projection_names.size()) throw Exception(ErrorCodes::TOO_MANY_QUERY_PLAN_OPTIMIZATIONS, "Too many projection optimizations applied to query plan. Current limit {}", max_optimizations_to_apply); @@ -201,10 +203,16 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s stack.pop_back(); } - if (optimization_settings.force_use_projection && has_reading_from_mt && num_applied_projection == 0) + if (optimization_settings.force_use_projection && has_reading_from_mt && applied_projection_names.empty()) throw Exception( ErrorCodes::PROJECTION_NOT_USED, "No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1"); + + if (!optimization_settings.force_projection_name.empty() && has_reading_from_mt && !applied_projection_names.contains(optimization_settings.force_projection_name)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Projection {} is specified in setting force_optimize_projection_name but not used", + optimization_settings.force_projection_name); } void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 30ff9970790..4017670ad14 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -552,28 +552,28 @@ static QueryPlan::Node * findReadingStep(QueryPlan::Node & node) return nullptr; } -bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections) +std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections) { if (node.children.size() != 1) - return false; + return {}; auto * aggregating = typeid_cast(node.step.get()); if (!aggregating) - return false; + return {}; if (!aggregating->canUseProjection()) - return false; + return {}; QueryPlan::Node * reading_node = findReadingStep(*node.children.front()); if (!reading_node) - return false; + return {}; auto * reading = typeid_cast(reading_node->step.get()); if (!reading) - return false; + return {}; if (!canUseProjectionForReadingStep(reading)) - return false; + return {}; std::shared_ptr max_added_blocks = getMaxAddedBlocks(reading); @@ -597,7 +597,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & if (ordinary_reading_marks == 0) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; @@ -631,15 +631,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & if (!best_candidate) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } } else { - return false; + return {}; } - Context::QualifiedProjectionName projection_name; chassert(best_candidate != nullptr); QueryPlanStepPtr projection_reading; @@ -654,12 +653,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); projection_reading = std::make_unique(std::move(pipe)); has_ordinary_parts = false; - - projection_name = Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = candidates.minmax_projection->candidate.projection->name, - }; } else { @@ -691,12 +684,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique(std::move(pipe)); } - projection_name = Context::QualifiedProjectionName - { - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = best_candidate->projection->name, - }; - has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; if (has_ordinary_parts) reading->setAnalyzedResult(std::move(best_candidate->merge_tree_ordinary_select_result_ptr)); @@ -746,7 +733,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & node.children.push_back(&expr_or_filter_node); } - return true; + return best_candidate->projection->name; } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 13c6c6b0821..728aaaa6fc4 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -73,16 +73,16 @@ static bool hasAllRequiredColumns(const ProjectionDescription * projection, cons } -bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) +std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) { const auto & frame = stack.back(); auto * reading = typeid_cast(frame.node->step.get()); if (!reading) - return false; + return {}; if (!canUseProjectionForReadingStep(reading)) - return false; + return {}; auto iter = stack.rbegin(); while (std::next(iter) != stack.rend()) @@ -96,7 +96,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) /// Dangling query plan node. This might be generated by StorageMerge. if (iter->node->step.get() == reading) - return false; + return {}; const auto metadata = reading->getStorageMetadata(); const auto & projections = metadata->projections; @@ -107,7 +107,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) normal_projections.push_back(&projection); if (normal_projections.empty()) - return false; + return {}; ContextPtr context = reading->getContext(); auto it = std::find_if(normal_projections.begin(), normal_projections.end(), [&](const auto * projection) @@ -126,7 +126,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) { auto & child = iter->node->children[iter->next_child - 1]; if (!query.build(*child)) - return false; + return {}; if (query.dag) query.dag->removeUnusedActions(); @@ -146,7 +146,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (ordinary_reading_marks == 0) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; @@ -185,7 +185,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!best_candidate) { reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return false; + return {}; } auto storage_snapshot = reading->getStorageSnapshot(); @@ -283,8 +283,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) /// Here we remove last steps from stack to be able to optimize again. /// In theory, read-in-order can be applied to projection. stack.resize(iter.base() - stack.begin()); - - return true; + return best_candidate->projection->name; } } diff --git a/tests/queries/0_stateless/02906_force_optimize_projection_name.reference b/tests/queries/0_stateless/02906_force_optimize_projection_name.reference index 9daeafb9864..679eff3f0b4 100644 --- a/tests/queries/0_stateless/02906_force_optimize_projection_name.reference +++ b/tests/queries/0_stateless/02906_force_optimize_projection_name.reference @@ -1 +1,3 @@ test +1 +0 diff --git a/tests/queries/0_stateless/02906_force_optimize_projection_name.sql b/tests/queries/0_stateless/02906_force_optimize_projection_name.sql index 952ef8178b7..6b9d7f74f9f 100644 --- a/tests/queries/0_stateless/02906_force_optimize_projection_name.sql +++ b/tests/queries/0_stateless/02906_force_optimize_projection_name.sql @@ -1,3 +1,5 @@ +DROP TABLE IF EXISTS test; + CREATE TABLE test ( `id` UInt64, @@ -18,3 +20,16 @@ SELECT name FROM test GROUP BY name SETTINGS force_optimize_projection_name='pro SELECT name FROM test GROUP BY name SETTINGS force_optimize_projection_name='non_existing_projection'; -- { serverError 117 } SELECT name FROM test SETTINGS force_optimize_projection_name='projection_name'; -- { serverError 117 } + +INSERT INTO test SELECT number, 'test' FROM numbers(1, 100) SETTINGS force_optimize_projection_name='projection_name'; +SELECT 1 SETTINGS force_optimize_projection_name='projection_name'; + +SYSTEM FLUSH LOGS; + +SELECT read_rows FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE '%SELECT name FROM test%' + AND Settings['force_optimize_projection_name'] = 'projection_name' + AND type = 'ExceptionBeforeStart'; + +DROP TABLE test; From bbbc721508f0d85125170eea4159b830ef8fd855 Mon Sep 17 00:00:00 2001 From: Gabriel Martinez Date: Thu, 16 May 2024 22:22:30 +0100 Subject: [PATCH 318/651] chore(ci-workers): remove reusable from tailscale key --- tests/ci/worker/prepare-ci-ami.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 86a8fae20ee..92e97865b18 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -183,7 +183,7 @@ setup_tailscale() { # Clean possible garbage from the runner type RUNNER_TYPE=${RUNNER_TYPE//[^0-9a-z]/-} TS_AUTHKEY=$(TS_API_CLIENT_ID="$TS_API_CLIENT_ID" TS_API_CLIENT_SECRET="$TS_API_CLIENT_SECRET" \ - get-authkey -tags tag:svc-core-ci-github -reusable -ephemeral) + get-authkey -tags tag:svc-core-ci-github -ephemeral) tailscale up --ssh --auth-key="$TS_AUTHKEY" --hostname="ci-runner-$RUNNER_TYPE-$INSTANCE_ID" } From 95a452f87ce816459a2bf3f66c15ec6e61bb0006 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 05:41:32 +0200 Subject: [PATCH 319/651] Update to the latest version where the patch is included --- contrib/datasketches-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/datasketches-cpp b/contrib/datasketches-cpp index 836b87e3131..76edd74f5db 160000 --- a/contrib/datasketches-cpp +++ b/contrib/datasketches-cpp @@ -1 +1 @@ -Subproject commit 836b87e313107506e15ce1f8d50241be56652e58 +Subproject commit 76edd74f5db286b672c170a8ded4ce39b3a8800f From 9dbc9f038b6e316b4227a54b4a70e1e0eb8f7361 Mon Sep 17 00:00:00 2001 From: copperybean Date: Fri, 17 May 2024 11:11:53 +0800 Subject: [PATCH 320/651] fix comments second time Change-Id: I4b75367233f99ef432cdff78f724195673755a83 --- src/Core/SettingsChangesHistory.h | 2 +- .../Formats/Impl/Parquet/ParquetDataValuesReader.cpp | 3 +++ .../Formats/Impl/Parquet/ParquetRecordReader.cpp | 12 ++++++++---- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 6fb8fb9358c..96ab7490c1f 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -91,6 +91,7 @@ static std::map sett {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."}, {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, + {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, }}, @@ -176,7 +177,6 @@ static std::map sett {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, }}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp index 1f0c7105572..65f569ec264 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -29,6 +29,9 @@ void RleValuesReader::nextGroup() { cur_group_size *= 8; cur_packed_bit_values.resize(cur_group_size); + + // try to suppress clang tidy warnings by assertion + assert(bit_width < 64); bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size); } else diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index fddd8059925..0b797dd66ad 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -192,6 +192,7 @@ std::unique_ptr ColReaderFactory::fromByteArray() switch (col_descriptor.logical_type()->type()) { case parquet::LogicalType::Type::STRING: + case parquet::LogicalType::Type::NONE: return makeLeafReader(); default: return throwUnsupported(); @@ -204,10 +205,13 @@ std::unique_ptr ColReaderFactory::fromFLBA() { case parquet::LogicalType::Type::DECIMAL: { - if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) - return makeDecimalLeafReader(); - else if (col_descriptor.type_length() <= static_cast(sizeof(Decimal256))) - return makeDecimalLeafReader(); + if (col_descriptor.type_length() > 0) + { + if (col_descriptor.type_length() <= static_cast(sizeof(Decimal128))) + return makeDecimalLeafReader(); + else if (col_descriptor.type_length() <= static_cast(sizeof(Decimal256))) + return makeDecimalLeafReader(); + } return throwUnsupported(PreformattedMessage::create( ", invalid type length: {}", col_descriptor.type_length())); From 56905d25ea3467f18308a57f0a21eefd23cd6553 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 05:44:28 +0200 Subject: [PATCH 321/651] Add a test --- tests/queries/0_stateless/03155_datasketches_ubsan.reference | 1 + tests/queries/0_stateless/03155_datasketches_ubsan.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/03155_datasketches_ubsan.reference create mode 100644 tests/queries/0_stateless/03155_datasketches_ubsan.sql diff --git a/tests/queries/0_stateless/03155_datasketches_ubsan.reference b/tests/queries/0_stateless/03155_datasketches_ubsan.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03155_datasketches_ubsan.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03155_datasketches_ubsan.sql b/tests/queries/0_stateless/03155_datasketches_ubsan.sql new file mode 100644 index 00000000000..40a66e37a19 --- /dev/null +++ b/tests/queries/0_stateless/03155_datasketches_ubsan.sql @@ -0,0 +1 @@ +SELECT uniqTheta(toFixedString('uniqTheta distinct', 18)) FROM (SELECT number % 2 AS x FROM numbers(10) WHERE materialize(16)); From c6e40a9288160c4cca2139c4175993b582a418b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 06:26:02 +0200 Subject: [PATCH 322/651] Better script to collect symbols statistics --- .../prepare-time-trace/prepare-time-trace.sh | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh index 2ecc1ddcd64..f5ae6772139 100755 --- a/utils/prepare-time-trace/prepare-time-trace.sh +++ b/utils/prepare-time-trace/prepare-time-trace.sh @@ -109,8 +109,19 @@ ENGINE = MergeTree ORDER BY (date, file, symbol, pull_request_number, commit_sha, check_name); /// -find "$INPUT_DIR" -type f -name '*.o' | grep -v cargo | find . -name '*.o' | xargs -P $(nproc) -I {} bash -c " - nm --demangle --defined-only --print-size '{}' | grep -v -P '[0-9a-zA-Z] r ' | sed 's@^@{} @' > '{}.symbols' -" +# nm does not work with LTO +if ! grep -q -- '-flto' compile_commands.json +then + # Find the best alternative of nm + for name in llvm-nm-{30..18} llvm-nm nm + do + NM=$(command -v ${name}) + [[ -n "${NM}" ]] && break + done -find "$INPUT_DIR" -type f -name '*.o.symbols' | xargs cat > "${OUTPUT_DIR}/binary_symbols.txt" + find "$INPUT_DIR" -type f -name '*.o' | grep -v cargo | find . -name '*.o' | xargs -P $(nproc) -I {} bash -c " + ${NM} --demangle --defined-only --print-size '{}' | grep -v -P '[0-9a-zA-Z] r ' | sed 's@^@{} @' > '{}.symbols' + " + + find "$INPUT_DIR" -type f -name '*.o.symbols' | xargs cat > "${OUTPUT_DIR}/binary_symbols.txt" +fi From 50dea75fff35532a7f8b1cacefd3cfc756e41f93 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 07:15:51 +0200 Subject: [PATCH 323/651] Update libunwind --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index ba533a7246a..854538ce337 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit ba533a7246a2686b0552061809612f503804d26b +Subproject commit 854538ce337d631b619010528adff22cd58f9dce From cc583185bdfe7f336af795d95cd97ce65cbef10b Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 17 May 2024 08:33:08 +0200 Subject: [PATCH 324/651] Add revision and make some formatting changes to other-functions page --- .../functions/other-functions.md | 437 ++++++++++++------ src/Functions/array/arrayUnion.cpp | 0 .../03155_function_array_clamp.sql | 11 + 3 files changed, 313 insertions(+), 135 deletions(-) create mode 100644 src/Functions/array/arrayUnion.cpp create mode 100755 tests/queries/0_stateless/03155_function_array_clamp.sql diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 11ee471d709..5b77f16027b 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -6,11 +6,21 @@ sidebar_label: Other # Other Functions -## hostName() +## hostName Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned. If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +**Syntax** + +```sql +hostName() +``` + +**Returned value** + +- Host name. [String](../data-types/string.md). + ## getMacro {#getMacro} Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. @@ -27,9 +37,7 @@ getMacro(name); **Returned value** -- Value of the specified macro. - -Type: [String](../../sql-reference/data-types/string.md). +- Value of the specified macro.[String](../../sql-reference/data-types/string.md). **Example** @@ -82,9 +90,7 @@ This function is case-insensitive. **Returned value** -- String with the fully qualified domain name. - -Type: `String`. +- String with the fully qualified domain name. [String](../data-types/string.md). **Example** @@ -163,34 +169,58 @@ Result: └────────────────┴────────────────────────────┘ ``` -## visibleWidth(x) +## visibleWidth Calculates the approximate width when outputting values to the console in text format (tab-separated). -This function is used by the system to implement Pretty formats. +This function is used by the system to implement [Pretty formats](../formats.mdx). `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. +**Syntax** + +```sql +visibleWidth(x) +``` + +**Example** + +Query: + ```sql SELECT visibleWidth(NULL) ``` +Result: + ```text ┌─visibleWidth(NULL)─┐ │ 4 │ └────────────────────┘ ``` -## toTypeName(x) +## toTypeName Returns the type name of the passed argument. If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation. -## blockSize() {#blockSize} +**Syntax** + +```sql +toTypeName(x) +``` + +## blockSize {#blockSize} In ClickHouse, queries are processed in blocks (chunks). This function returns the size (row count) of the block the function is called on. +**Syntax** + +```sql +blockSize() +``` + ## byteSize Returns an estimation of uncompressed byte size of its arguments in memory. @@ -207,9 +237,7 @@ byteSize(argument [, ...]) **Returned value** -- Estimation of byte size of the arguments in memory. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Estimation of byte size of the arguments in memory. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -288,16 +316,28 @@ Result: └────────────────────────────┘ ``` -## materialize(x) +## materialize Turns a constant into a full column containing a single value. Full columns and constants are represented differently in memory. Functions usually execute different code for normal and constant arguments, although the result should typically be the same. This function can be used to debug this behavior. -## ignore(…) +**Syntax** + +```sql +materialize(x) +``` + +## ignore Accepts any arguments, including `NULL` and does nothing. Always returns 0. The argument is internally still evaluated. Useful e.g. for benchmarks. +**Syntax** + +```sql +ignore(…) +``` + ## sleep Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes. @@ -392,27 +432,33 @@ The `sleepEachRow()` function is primarily used for testing and debugging purpos Like the [`sleep()` function](#sleep), it's important to use `sleepEachRow()` judiciously and only when necessary, as it can significantly impact the overall performance and responsiveness of your ClickHouse system, especially when dealing with large result sets. -## currentDatabase() +## currentDatabase Returns the name of the current database. Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database. -## currentUser() {#currentUser} +**Syntax** + +```sql +currentDatabase() +``` + +## currentUser {#currentUser} Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. +**Syntax** + ```sql -SELECT currentUser(); +currentUser() ``` Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive. **Returned values** -- The name of the current user. -- In distributed queries, the login of the user who initiated the query. - -Type: `String`. +- The name of the current user. [String](../data-types/string.md). +- In distributed queries, the login of the user who initiated the query. [String](../data-types/string.md). **Example** @@ -448,10 +494,8 @@ isConstant(x) **Returned values** -- `1` if `x` is constant. -- `0` if `x` is non-constant. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `x` is constant. [UInt8](../../sql-reference/data-types/int-uint.md). +- `0` if `x` is non-constant. [UInt8](../../sql-reference/data-types/int-uint.md). **Examples** @@ -497,14 +541,26 @@ Result: └────────────────────┘ ``` -## isFinite(x) +## isFinite Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. -## isInfinite(x) +**Syntax** + +```sql +isFinite(x) +``` + +## isInfinite Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. +**Syntax** + +```sql +isInfinite(x) +``` + ## ifNotFinite Checks whether a floating point value is finite. @@ -517,8 +573,8 @@ ifNotFinite(x,y) **Arguments** -- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). +- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md). +- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md). **Returned value** @@ -539,10 +595,16 @@ Result: You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. -## isNaN(x) +## isNaN Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. +**Syntax** + +```sql +isNaN(x) +``` + ## hasColumnInTable Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. @@ -733,11 +795,19 @@ LIMIT 10 └────────────────┴─────────┘ ``` -## formatReadableDecimalSize(x) +## formatReadableDecimalSize Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableDecimalSize(x) +``` + +**Example** + +Query: ```sql SELECT @@ -745,6 +815,8 @@ SELECT formatReadableDecimalSize(filesize_bytes) AS filesize ``` +Result: + ```text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ @@ -754,11 +826,20 @@ SELECT └────────────────┴────────────┘ ``` -## formatReadableSize(x) +## formatReadableSize Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableSize(x) +``` +Alias: `FORMAT_BYTES`. + +**Example** + +Query: ```sql SELECT @@ -766,7 +847,7 @@ SELECT formatReadableSize(filesize_bytes) AS filesize ``` -Alias: `FORMAT_BYTES`. +Result: ```text ┌─filesize_bytes─┬─filesize───┐ @@ -777,11 +858,19 @@ Alias: `FORMAT_BYTES`. └────────────────┴────────────┘ ``` -## formatReadableQuantity(x) +## formatReadableQuantity Given a number, this function returns a rounded number with suffix (thousand, million, billion, etc.) as string. -Example: +**Syntax** + +```sql +formatReadableQuantity(x) +``` + +**Example** + +Query: ```sql SELECT @@ -789,6 +878,8 @@ SELECT formatReadableQuantity(number) AS number_for_humans ``` +Result: + ```text ┌─────────number─┬─number_for_humans─┐ │ 1024 │ 1.02 thousand │ @@ -903,15 +994,27 @@ SELECT parseTimeDelta('1yr2mo') └──────────────────────────┘ ``` -## least(a, b) +## least Returns the smaller value of a and b. -## greatest(a, b) +**Syntax** + +```sql +least(a, b) +``` + +## greatest Returns the larger value of a and b. -## uptime() +**Syntax** + +```sql +greatest(a, b) +``` + +## uptime Returns the server’s uptime in seconds. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. @@ -924,9 +1027,7 @@ uptime() **Returned value** -- Time value of seconds. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Time value of seconds. [UInt32](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -944,7 +1045,7 @@ Result: └────────┘ ``` -## version() +## version Returns the current version of ClickHouse as a string in the form of: @@ -971,7 +1072,7 @@ None. **Returned value** -Type: [String](../data-types/string) +- Current version of ClickHouse. [String](../data-types/string) **Implementation details** @@ -993,23 +1094,47 @@ SELECT version() └───────────┘ ``` -## buildId() +## buildId Returns the build ID generated by a compiler for the running ClickHouse server binary. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. -## blockNumber() +**Syntax** + +```sql +buildId() +``` + +## blockNumber Returns the sequence number of the data block where the row is located. -## rowNumberInBlock() {#rowNumberInBlock} +**Syntax** + +```sql +blockNumber() +``` + +## rowNumberInBlock {#rowNumberInBlock} Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. -## rowNumberInAllBlocks() +**Syntax** + +```sql +rowNumberInBlock() +``` + +## rowNumberInAllBlocks Returns the ordinal number of the row in the data block. This function only considers the affected data blocks. +**Syntax** + +```sql +rowNumberInAllBlocks() +``` + ## neighbor The window function that provides access to a row at a specified offset before or after the current row of a given column. @@ -1128,7 +1253,7 @@ Result: └────────────┴───────┴───────────┴────────────────┘ ``` -## runningDifference(x) {#runningDifference} +## runningDifference {#runningDifference} Calculates the difference between two consecutive row values in the data block. Returns 0 for the first row, and for subsequent rows the difference to the previous row. @@ -1143,7 +1268,15 @@ The result of the function depends on the affected data blocks and the order of The order of rows during calculation of `runningDifference()` can differ from the order of rows returned to the user. To prevent that you can create a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. -Example: +**Syntax** + +```sql +runningDifference(x) +``` + +**Example** + +Query: ```sql SELECT @@ -1162,6 +1295,8 @@ FROM ) ``` +Result: + ```text ┌─EventID─┬───────────EventTime─┬─delta─┐ │ 1106 │ 2016-11-24 00:00:04 │ 0 │ @@ -1174,6 +1309,8 @@ FROM Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block. +Query: + ```sql SELECT number, @@ -1182,6 +1319,8 @@ FROM numbers(100000) WHERE diff != 1 ``` +Result: + ```text ┌─number─┬─diff─┐ │ 0 │ 0 │ @@ -1191,6 +1330,8 @@ WHERE diff != 1 └────────┴──────┘ ``` +Query: + ```sql set max_block_size=100000 -- default value is 65536! @@ -1201,6 +1342,8 @@ FROM numbers(100000) WHERE diff != 1 ``` +Result: + ```text ┌─number─┬─diff─┐ │ 0 │ 0 │ @@ -1238,9 +1381,7 @@ runningConcurrency(start, end) **Returned values** -- The number of concurrent events at each event start time. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md) +- The number of concurrent events at each event start time. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -1272,23 +1413,43 @@ Result: └────────────┴────────────────────────────────┘ ``` -## MACNumToString(num) +## MACNumToString Interprets a UInt64 number as a MAC address in big endian format. Returns the corresponding MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form) as string. -## MACStringToNum(s) +**Syntax** + +```sql +MACNumToString(num) +``` + +## MACStringToNum The inverse function of MACNumToString. If the MAC address has an invalid format, it returns 0. -## MACStringToOUI(s) +**Syntax** + +```sql +MACStringToNum(s) +``` + +## MACStringToOUI Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form), returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0. +**Syntax** + +```sql +MACStringToOUI(s) +``` + ## getSizeOfEnumType Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). An exception is thrown if the type is not `Enum`. +**Syntax** + ```sql getSizeOfEnumType(value) ``` @@ -1349,6 +1510,8 @@ Result: Returns the internal name of the data type that represents the value. +**Syntax** + ```sql toColumnTypeName(value) ``` @@ -1427,6 +1590,8 @@ Returns the default value for the given data type. Does not include default values for custom columns set by the user. +**Syntax** + ```sql defaultValueOfArgumentType(expression) ``` @@ -1625,29 +1790,31 @@ Result: Creates an array with a single value. -Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +:::note +This function is used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +::: + +**Syntax** ```sql -SELECT replicate(x, arr); +replicate(x, arr) ``` -**Arguments:** +**Arguments** -- `arr` — An array. - `x` — The value to fill the result array with. +- `arr` — An array. [Array](../data-types/array.md). **Returned value** -An array of the lame length as `arr` filled with value `x`. - -Type: `Array`. +An array of the lame length as `arr` filled with value `x`. [Array](../data-types/array.md). **Example** Query: ```sql -SELECT replicate(1, ['a', 'b', 'c']) +SELECT replicate(1, ['a', 'b', 'c']); ``` Result: @@ -1658,6 +1825,36 @@ Result: └───────────────────────────────┘ ``` +## revision + +Returns the current ClickHouse [server revision](../../operations/system-tables/metrics#revision). + +**Syntax** + +```sql +revision() +``` + +**Returned value** + +- The current ClickHouse server revision. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT revision(); +``` + +Result: + +```response +┌─revision()─┐ +│ 54485 │ +└────────────┘ +``` + ## filesystemAvailable Returns the amount of free space in the filesystem hosting the database persistence. The returned value is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for the operating system. @@ -1670,9 +1867,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of remaining space available in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Example** @@ -1702,9 +1897,7 @@ filesystemFree() **Returned value** -- The amount of free space in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of free space in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Example** @@ -1734,9 +1927,7 @@ filesystemCapacity() **Returned value** -- Capacity of the filesystem in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Capacity of the filesystem in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Example** @@ -2100,7 +2291,7 @@ Result: └──────────────────────────────────────────────────┘ ``` -## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) +## catboostEvaluate :::note This function is not available in ClickHouse Cloud. @@ -2109,6 +2300,14 @@ This function is not available in ClickHouse Cloud. Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning. Accepts a path to a catboost model and model arguments (features). Returns Float64. +**Syntax** + +```sql +catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) +``` + +**Example** + ```sql SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction FROM data_table @@ -2145,10 +2344,16 @@ communicate using a HTTP interface. By default, port `9012` is used. A different See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set. -## throwIf(x\[, message\[, error_code\]\]) +## throwIf Throw an exception if argument `x` is true. +**Syntax** + +```sql +throwIf(x\[, message\[, error_code\]\]) +``` + **Arguments** - `x` - the condition to check. @@ -2284,9 +2489,7 @@ countDigits(x) **Returned value** -Number of digits. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Number of digits. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). :::note For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). @@ -2310,9 +2513,7 @@ Result: ## errorCodeToName -Returns the textual name of an error code. - -Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). +- Returns the textual name of an error code. [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). **Syntax** @@ -2343,9 +2544,7 @@ tcpPort() **Returned value** -- The TCP port number. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The TCP port number. [UInt16](../../sql-reference/data-types/int-uint.md). **Example** @@ -2381,9 +2580,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the current user settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## enabledProfiles @@ -2397,9 +2594,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## defaultProfiles @@ -2413,9 +2608,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## currentRoles @@ -2429,9 +2622,7 @@ currentRoles() **Returned value** -- A list of the current roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- A list of the current roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## enabledRoles @@ -2445,9 +2636,7 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## defaultRoles @@ -2461,9 +2650,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## getServerPort @@ -2492,9 +2679,7 @@ getServerPort(port_name) **Returned value** -- The number of the server port. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The number of the server port. [UInt16](../../sql-reference/data-types/int-uint.md). **Example** @@ -2526,9 +2711,7 @@ queryID() **Returned value** -- The ID of the current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the current query. [String](../../sql-reference/data-types/string.md). **Example** @@ -2562,9 +2745,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the initial current query. [String](../../sql-reference/data-types/string.md). **Example** @@ -2597,9 +2778,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Shard index or constant `0`. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -2639,9 +2818,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Total number of shards or `0`. [UInt32](../../sql-reference/data-types/int-uint.md). **See Also** @@ -2663,9 +2840,7 @@ getOSKernelVersion() **Returned value** -- The current OS kernel version. - -Type: [String](../../sql-reference/data-types/string.md). +- The current OS kernel version. [String](../../sql-reference/data-types/string.md). **Example** @@ -2699,9 +2874,7 @@ zookeeperSessionUptime() **Returned value** -- Uptime of the current ZooKeeper session in seconds. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Uptime of the current ZooKeeper session in seconds. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -2738,9 +2911,7 @@ All arguments must be constant. **Returned value** -- Randomly generated table structure. - -Type: [String](../../sql-reference/data-types/string.md). +- Randomly generated table structure. [String](../../sql-reference/data-types/string.md). **Examples** @@ -2807,9 +2978,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema - -Type: [String](../../sql-reference/data-types/string.md). +- CapnProto schema. [String](../../sql-reference/data-types/string.md). **Examples** @@ -2908,9 +3077,7 @@ structureToProtobufSchema(structure) **Returned value** -- Protobuf schema - -Type: [String](../../sql-reference/data-types/string.md). +- Protobuf schema. [String](../../sql-reference/data-types/string.md). **Examples** diff --git a/src/Functions/array/arrayUnion.cpp b/src/Functions/array/arrayUnion.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03155_function_array_clamp.sql b/tests/queries/0_stateless/03155_function_array_clamp.sql new file mode 100755 index 00000000000..4794dafda4b --- /dev/null +++ b/tests/queries/0_stateless/03155_function_array_clamp.sql @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-ordinary-database, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# In previous versions this command took longer than ten minutes. Now it takes less than a second in release mode: + +python3 -c 'import sys; import struct; sys.stdout.buffer.write(b"".join(struct.pack(" Date: Fri, 17 May 2024 08:44:25 +0200 Subject: [PATCH 325/651] Remove files which shouldn't be on this branch --- src/Functions/array/arrayUnion.cpp | 0 .../0_stateless/03155_function_array_clamp.sql | 11 ----------- 2 files changed, 11 deletions(-) delete mode 100644 src/Functions/array/arrayUnion.cpp delete mode 100755 tests/queries/0_stateless/03155_function_array_clamp.sql diff --git a/src/Functions/array/arrayUnion.cpp b/src/Functions/array/arrayUnion.cpp deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03155_function_array_clamp.sql b/tests/queries/0_stateless/03155_function_array_clamp.sql deleted file mode 100755 index 4794dafda4b..00000000000 --- a/tests/queries/0_stateless/03155_function_array_clamp.sql +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-ordinary-database, long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -# In previous versions this command took longer than ten minutes. Now it takes less than a second in release mode: - -python3 -c 'import sys; import struct; sys.stdout.buffer.write(b"".join(struct.pack(" Date: Fri, 17 May 2024 08:57:10 +0200 Subject: [PATCH 326/651] Update gui.md - Add ch-ui to open-source available tools. - Added ch-ui to the list of tools for managing ClickHouse databases under "Open source tools" --- docs/en/interfaces/third-party/gui.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index bb326dc5242..0d888812cd5 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -222,6 +222,17 @@ Features: - Useful tools: Zookeeper data exploration, query EXPLAIN, kill queries, etc. - Visualization metric charts: queries and resource usage, number of merges/mutation, merge performance, query performance, etc. +### ch-ui {#ch-ui} + +[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases, designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions. + +Features: + +- ClickHouse Integration: Easily manage connections and execute queries. +- Responsive Tab Management: Dynamically handle multiple tabs, such as query and table tabs. +- Performance Optimizations: Utilize IndexedDB for efficient caching and state management. +- Local Data Storage: All data is stored locally in the browser, ensuring no data is sent anywhere else. + ## Commercial {#commercial} ### DataGrip {#datagrip} From 7d6d5165fe62a141b500eab053eb0476018df263 Mon Sep 17 00:00:00 2001 From: qiangxuhui Date: Fri, 17 May 2024 07:37:28 +0000 Subject: [PATCH 327/651] Update contrib/libunwind to master --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index fa519913b9c..854538ce337 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit fa519913b9c157663824bee95c5737669a40eaf3 +Subproject commit 854538ce337d631b619010528adff22cd58f9dce From 04f631dec9743da6655586073c28c895cfd7cd76 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 17 May 2024 10:13:13 +0200 Subject: [PATCH 328/651] Better --- src/Core/SettingsQuirks.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 73a0e2a9a6c..5541cc19653 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -120,8 +120,8 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) "input_format_parquet_max_block_size"}; for (auto const & setting : block_rows_settings) { - auto block_size = get_current_value(setting).get(); - if (block_size > max_sane_block_rows_size) + if (auto block_size = get_current_value(setting).get(); + block_size > max_sane_block_rows_size) { if (log) LOG_WARNING(log, "Sanity check: '{}' value is too high ({}). Reduced to {}", setting, block_size, max_sane_block_rows_size); From 26501178e63d063a39581f059d9bf7c2b63f7dc9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 10:23:32 +0200 Subject: [PATCH 329/651] Fix analyzer --- docker/test/fuzzer/run-fuzzer.sh | 2 +- docs/changelogs/v22.11.1.1360-stable.md | 2 +- docs/changelogs/v22.8.1.2097-lts.md | 2 +- docs/changelogs/v23.10.1.1976-stable.md | 2 +- .../operations/utilities/clickhouse-keeper-client.md | 2 +- programs/client/Client.cpp | 2 +- programs/keeper-client/Commands.cpp | 2 +- src/Analyzer/Passes/QueryAnalysisPass.cpp | 12 ++++++------ src/Common/Logger.h | 4 ++-- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Dictionaries/SSDCacheDictionaryStorage.h | 2 +- src/Interpreters/Context.cpp | 2 +- src/Planner/TableExpressionData.h | 4 ++-- src/Server/TCPHandler.cpp | 2 +- .../Distributed/DistributedAsyncInsertBatch.cpp | 2 +- .../DistributedAsyncInsertDirectoryQueue.cpp | 2 +- src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 2 +- src/Storages/MergeTree/IMergeTreeReader.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp | 10 +++++----- .../MergeTree/MergeTreeIndexBloomFilterText.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 6 +++--- tests/integration/test_access_for_functions/test.py | 2 +- .../test.py | 2 +- .../test_user_defined_object_persistence/test.py | 4 ++-- .../queries/0_stateless/02722_database_filesystem.sh | 2 +- .../03096_text_log_format_string_args_not_empty.sql | 2 +- 29 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index d1ccdea304b..b8f967ed9c2 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -138,7 +138,7 @@ function filter_exists_and_template # but it doesn't allow to use regex echo "$path" | sed 's/\.sql\.j2$/.gen.sql/' else - echo "'$path' does not exists" >&2 + echo "'$path' does not exist" >&2 fi done } diff --git a/docs/changelogs/v22.11.1.1360-stable.md b/docs/changelogs/v22.11.1.1360-stable.md index 4aa110484f8..4acaffb2c3b 100644 --- a/docs/changelogs/v22.11.1.1360-stable.md +++ b/docs/changelogs/v22.11.1.1360-stable.md @@ -93,7 +93,7 @@ sidebar_label: 2022 * `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). * Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). -* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible SIGSEGV for web disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). * Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). * Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). * Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.8.1.2097-lts.md b/docs/changelogs/v22.8.1.2097-lts.md index b6b634f4826..f9a1fa8a4a9 100644 --- a/docs/changelogs/v22.8.1.2097-lts.md +++ b/docs/changelogs/v22.8.1.2097-lts.md @@ -53,7 +53,7 @@ sidebar_label: 2022 * Store Keeper API version inside a predefined path. [#39096](https://github.com/ClickHouse/ClickHouse/pull/39096) ([Antonio Andelic](https://github.com/antonio2368)). * Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Add profile events for fsync. [#39179](https://github.com/ClickHouse/ClickHouse/pull/39179) ([Azat Khuzhin](https://github.com/azat)). -* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exists. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exist. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)). * Some small fixes for reading via http, allow to retry partial content in case if got 200OK. [#39244](https://github.com/ClickHouse/ClickHouse/pull/39244) ([Kseniia Sumarokova](https://github.com/kssenii)). * Improved Base58 encoding/decoding. [#39292](https://github.com/ClickHouse/ClickHouse/pull/39292) ([Andrey Zvonov](https://github.com/zvonand)). * Normalize `AggregateFunction` types and state representations because optimizations like https://github.com/ClickHouse/ClickHouse/pull/35788 will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)). diff --git a/docs/changelogs/v23.10.1.1976-stable.md b/docs/changelogs/v23.10.1.1976-stable.md index b08383a859b..4d093f934f1 100644 --- a/docs/changelogs/v23.10.1.1976-stable.md +++ b/docs/changelogs/v23.10.1.1976-stable.md @@ -291,7 +291,7 @@ sidebar_label: 2023 * Fix replica groups for Replicated database engine [#55587](https://github.com/ClickHouse/ClickHouse/pull/55587) ([Azat Khuzhin](https://github.com/azat)). * Remove unused protobuf includes [#55590](https://github.com/ClickHouse/ClickHouse/pull/55590) ([Raúl Marín](https://github.com/Algunenano)). * Apply Context changes to standalone Keeper [#55591](https://github.com/ClickHouse/ClickHouse/pull/55591) ([Antonio Andelic](https://github.com/antonio2368)). -* Do not fail if label-to-remove does not exists in PR [#55592](https://github.com/ClickHouse/ClickHouse/pull/55592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not fail if label-to-remove does not exist in PR [#55592](https://github.com/ClickHouse/ClickHouse/pull/55592) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * CI: cast extra column expression `pull_request_number` to Int32 [#55599](https://github.com/ClickHouse/ClickHouse/pull/55599) ([Han Fei](https://github.com/hanfei1991)). * Add back a test that was removed by mistake [#55605](https://github.com/ClickHouse/ClickHouse/pull/55605) ([Alexander Tokmakov](https://github.com/tavplubix)). * Bump croaring to v2.0.4 [#55606](https://github.com/ClickHouse/ClickHouse/pull/55606) ([Robert Schulze](https://github.com/rschu1ze)). diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 4588f68cacd..6407c66783b 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -35,7 +35,7 @@ api_version /keeper/api_version :) ls /keeper/api_version :) cd xyz -Path /keeper/api_version/xyz does not exists +Path /keeper/api_version/xyz does not exist /keeper/api_version :) cd ../../ / :) ls keeper foo bar diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 396cd3e646b..01ed7d70b38 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -263,7 +263,7 @@ void Client::initialize(Poco::Util::Application & self) config().add(loaded_config.configuration); } else if (config().has("connection")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exists"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exist"); /** getenv is thread-safe in Linux glibc and in all sane libc implementations. * But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer. diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index ec5eaf5070c..a109912e6e0 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -63,7 +63,7 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con auto new_path = client->getAbsolutePath(query->args[0].safeGet()); if (!client->zookeeper->exists(new_path)) - std::cerr << "Path " << new_path << " does not exists\n"; + std::cerr << "Path " << new_path << " does not exist\n"; else client->cwd = new_path; } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index bfe2f981c63..dae17e18b85 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5090,14 +5090,14 @@ ProjectionName QueryAnalyzer::resolveWindow(QueryTreeNodePtr & node, IdentifierR auto * nearest_query_scope = scope.getNearestQueryScope(); if (!nearest_query_scope) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' does not exists.", parent_window_name); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' does not exist.", parent_window_name); auto & scope_window_name_to_window_node = nearest_query_scope->window_name_to_window_node; auto window_node_it = scope_window_name_to_window_node.find(parent_window_name); if (window_node_it == scope_window_name_to_window_node.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Window '{}' does not exists. In scope {}", + "Window '{}' does not exist. In scope {}", parent_window_name, nearest_query_scope->scope_node->formatASTForErrorMessage()); @@ -5861,7 +5861,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi { if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name)) { - throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Aggregate function with name '{}' does not exists. In scope {}{}", + throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Aggregate function with name '{}' does not exist. In scope {}{}", function_name, scope.scope_node->formatASTForErrorMessage(), getHintsErrorMessageSuffix(AggregateFunctionFactory::instance().getHints(function_name))); } @@ -5942,7 +5942,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto hints = NamePrompter<2>::getHints(function_name, possible_function_names); throw Exception(ErrorCodes::UNKNOWN_FUNCTION, - "Function with name '{}' does not exists. In scope {}{}", + "Function with name '{}' does not exist. In scope {}{}", function_name, scope.scope_node->formatASTForErrorMessage(), getHintsErrorMessageSuffix(hints)); @@ -8070,7 +8070,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier auto window_node_it = scope.window_name_to_window_node.find(parent_window_name); if (window_node_it == scope.window_name_to_window_node.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Window '{}' does not exists. In scope {}", + "Window '{}' does not exist. In scope {}", parent_window_name, scope.scope_node->formatASTForErrorMessage()); @@ -8268,7 +8268,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (!has_node_in_alias_table) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Node {} with duplicate alias {} does not exists in alias table. In scope {}", + "Node {} with duplicate alias {} does not exist in alias table. In scope {}", node->formatASTForErrorMessage(), node_alias, scope.scope_node->formatASTForErrorMessage()); diff --git a/src/Common/Logger.h b/src/Common/Logger.h index b4688eb0a9c..b54ccd33e72 100644 --- a/src/Common/Logger.h +++ b/src/Common/Logger.h @@ -26,12 +26,12 @@ using LoggerRawPtr = Poco::Logger *; * Then it must be destroyed when underlying table is destroyed. */ -/** Get Logger with specified name. If the Logger does not exists, it is created. +/** Get Logger with specified name. If the Logger does not exist, it is created. * Logger is destroyed, when last shared ptr that refers to Logger with specified name is destroyed. */ LoggerPtr getLogger(const std::string & name); -/** Get Logger with specified name. If the Logger does not exists, it is created. +/** Get Logger with specified name. If the Logger does not exist, it is created. * This overload was added for specific purpose, when logger is constructed from constexpr string. * Logger is destroyed only during program shutdown. */ diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 84dd385e191..1c82131af0d 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -440,7 +440,7 @@ void DatabaseMySQL::detachTablePermanently(ContextPtr, const String & table_name throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); if (fs::exists(remove_flag)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "The remove flag file already exists but the {}.{} does not exists remove tables, it is bug.", + throw Exception(ErrorCodes::LOGICAL_ERROR, "The remove flag file already exists but the {}.{} does not exist remove tables, it is bug.", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); auto table_iter = local_tables_cache.find(table_name); diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index b23f9617ec8..e3eea71cd9a 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -470,7 +470,7 @@ public: auto path = std::filesystem::path{file_path}; auto parent_path_directory = path.parent_path(); - /// If cache file is in directory that does not exists create it + /// If cache file is in directory that does not exist create it if (!std::filesystem::exists(parent_path_directory)) if (!std::filesystem::create_directories(parent_path_directory)) throw Exception(ErrorCodes::CANNOT_CREATE_DIRECTORY, "Failed to create directories."); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 547ea7ec566..dc612e44642 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1641,7 +1641,7 @@ void Context::updateExternalTable(const String & table_name, std::shared_ptrsecond = std::move(temporary_table); } diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 9ab7a8e64fe..9723a00a356 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -137,7 +137,7 @@ public: if (it == column_name_to_column.end()) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column for column name {} does not exists. There are only column names: {}", + "Column for column name {} does not exist. There are only column names: {}", column_name, fmt::join(column_names.begin(), column_names.end(), ", ")); } @@ -154,7 +154,7 @@ public: if (it == column_name_to_column_identifier.end()) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Column identifier for column name {} does not exists. There are only column names: {}", + "Column identifier for column name {} does not exist. There are only column names: {}", column_name, fmt::join(column_names.begin(), column_names.end(), ", ")); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e1ddd89322f..3db935729b4 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1107,7 +1107,7 @@ void TCPHandler::processTablesStatusRequest() ContextPtr context_to_resolve_table_names; if (is_interserver_mode) { - /// In interserver mode session context does not exists, because authentication is done for each query. + /// In the interserver mode session context does not exist, because authentication is done for each query. /// We also cannot create query context earlier, because it cannot be created before authentication, /// but query is not received yet. So we have to do this trick. ContextMutablePtr fake_interserver_context = Context::createCopy(server.context()); diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 2d052255ac5..06d4c185840 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -173,7 +173,7 @@ bool DistributedAsyncInsertBatch::valid() { if (!fs::exists(file)) { - LOG_WARNING(parent.log, "File {} does not exists, likely due abnormal shutdown", file); + LOG_WARNING(parent.log, "File {} does not exist, likely due abnormal shutdown", file); res = false; } } diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 1ee77611191..14866c25365 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -554,7 +554,7 @@ void DistributedAsyncInsertDirectoryQueue::processFilesWithBatching(const Settin { if (!fs::exists(file_path)) { - LOG_WARNING(log, "File {} does not exists, likely due to current_batch.txt processing", file_path); + LOG_WARNING(log, "File {} does not exist, likely due to current_batch.txt processing", file_path); continue; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 052e3ba4b74..82af6c1fbe8 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -580,7 +580,7 @@ void DataPartStorageOnDiskBase::rename( disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); disk.moveDirectory(from, to); - /// Only after moveDirectory() since before the directory does not exists. + /// Only after moveDirectory() since before the directory does not exist. SyncGuardPtr to_sync_guard; if (fsync_part_dir) to_sync_guard = volume->getDisk()->getDirectorySyncGuard(to); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index a5b84eba241..a1ec0339fd6 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -100,7 +100,7 @@ protected: /// Position and level (of nesting). using ColumnNameLevel = std::optional>; - /// In case of part of the nested column does not exists, offsets should be + /// In case of part of the nested column does not exist, offsets should be /// read, but only the offsets for the current column, that is why it /// returns pair of size_t, not just one. ColumnNameLevel findColumnForOffsets(const NameAndTypePair & column) const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f448a9a820d..167160db317 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5126,7 +5126,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String auto volume = getStoragePolicy()->getVolumeByName(name); if (!volume) - throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName()); + throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exist on policy {}", name, getStoragePolicy()->getName()); if (parts.empty()) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists)."); diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index 4b164f5ac42..f5569d95a1b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -490,11 +490,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeIn( if (key_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] IN ('') because if key does not exists in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] IN ('') because if the key does not exist in the map + * we return the default value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where the map key does not exist. */ if (!prepared_set) return false; @@ -781,11 +781,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals")) { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map * we return default value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where map key does not exist. */ if (value_field == value_type->getDefault()) return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 826b149cf01..82a0e0f8cd1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -444,11 +444,11 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( if (key_function_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map * we return default value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where map key does not exist. */ if (value_field == value_type->getDefault()) return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 9535cf18127..cea260e0a76 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -477,11 +477,11 @@ bool MergeTreeConditionFullText::traverseASTEquals( if (function.getFunctionName() == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exists in map + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map * we return default value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because - * that way we skip necessary granules where map key does not exists. + * that way we skip necessary granules where map key does not exist. */ if (value_field == value_type->getDefault()) return false; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 643a1c31474..34fb214a1ce 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -232,7 +232,7 @@ bool MergeTreeReaderCompact::needSkipStream(size_t column_pos, const ISerializat /// /// Consider the following columns in nested "root": /// - root.array Array(UInt8) - exists - /// - root.nested_array Array(Array(UInt8)) - does not exists (only_offsets_level=1) + /// - root.nested_array Array(Array(UInt8)) - does not exist (only_offsets_level=1) /// /// For root.nested_array it will try to read multiple streams: /// - offsets (substream_path = {ArraySizes}) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 4f2790ac24a..bfb35f72b72 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -58,7 +58,7 @@ static inline String generateInnerTableName(const StorageID & view_id) return ".inner." + view_id.getTableName(); } -/// Remove columns from target_header that does not exists in src_header +/// Remove columns from target_header that does not exist in src_header static void removeNonCommonColumns(const Block & src_header, Block & target_header) { std::set target_only_positions; @@ -233,10 +233,10 @@ void StorageMaterializedView::read( auto mv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, context, processed_stage); auto target_header = query_plan.getCurrentDataStream().header; - /// No need to convert columns that does not exists in MV + /// No need to convert columns that does not exist in MV removeNonCommonColumns(mv_header, target_header); - /// No need to convert columns that does not exists in the result header. + /// No need to convert columns that does not exist in the result header. /// /// Distributed storage may process query up to the specific stage, and /// so the result header may not include all the columns from the diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index 3e58c961421..004d39e1dea 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -42,7 +42,7 @@ def test_access_rights_for_function(): function_resolution_error = instance.query_and_get_error("SELECT MySum(1, 2)") assert ( "Unknown function MySum" in function_resolution_error - or "Function with name 'MySum' does not exists." in function_resolution_error + or "Function with name 'MySum' does not exist." in function_resolution_error ) instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") diff --git a/tests/integration/test_sql_user_defined_functions_on_cluster/test.py b/tests/integration/test_sql_user_defined_functions_on_cluster/test.py index 0bf03f545be..dc36a7b32b8 100644 --- a/tests/integration/test_sql_user_defined_functions_on_cluster/test.py +++ b/tests/integration/test_sql_user_defined_functions_on_cluster/test.py @@ -28,7 +28,7 @@ def test_sql_user_defined_functions_on_cluster(): error_message = node.query_and_get_error("SELECT test_function(1);") assert ( "Unknown function test_function" in error_message - or "Function with name 'test_function' does not exists. In scope SELECT test_function(1)" + or "Function with name 'test_function' does not exist. In scope SELECT test_function(1)" in error_message ) diff --git a/tests/integration/test_user_defined_object_persistence/test.py b/tests/integration/test_user_defined_object_persistence/test.py index 1919da0726e..986438a4eed 100644 --- a/tests/integration/test_user_defined_object_persistence/test.py +++ b/tests/integration/test_user_defined_object_persistence/test.py @@ -38,13 +38,13 @@ def test_persistence(): error_message = instance.query_and_get_error("SELECT MySum1(1, 2)") assert ( "Unknown function MySum1" in error_message - or "Function with name 'MySum1' does not exists. In scope SELECT MySum1(1, 2)" + or "Function with name 'MySum1' does not exist. In scope SELECT MySum1(1, 2)" in error_message ) error_message = instance.query_and_get_error("SELECT MySum2(1, 2)") assert ( "Unknown function MySum2" in error_message - or "Function with name 'MySum2' does not exists. In scope SELECT MySum2(1, 2)" + or "Function with name 'MySum2' does not exist. In scope SELECT MySum2(1, 2)" in error_message ) diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 0c75c15fc69..374dd246c96 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -72,7 +72,7 @@ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('/tmp'); """ 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||: -# BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exists +# BAD_ARGUMENTS: .../user_files/relative_unknown_dir does not exist ${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ DROP DATABASE IF EXISTS test2; CREATE DATABASE test2 ENGINE = Filesystem('relative_unknown_dir'); diff --git a/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql b/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql index 0f4a217a4ae..cffc8a49c67 100644 --- a/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql +++ b/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql @@ -10,4 +10,4 @@ select count() > 0 from system.text_log where message_format_string = 'Peak memo select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Unknown {}{} identifier \'{}\' in scope {}{}' and value1 = 'expression' and value3 = 'count' and value4 = 'SELECT count'; -select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Function with name \'{}\' does not exists. In scope {}{}' and value1 = 'conut' and value2 = 'SELECT conut()' and value3 ilike '%\'count\'%'; +select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Function with name \'{}\' does not exist. In scope {}{}' and value1 = 'conut' and value2 = 'SELECT conut()' and value3 ilike '%\'count\'%'; From 64a5bbe68342624394112cf360802f47522ea517 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 10:24:37 +0200 Subject: [PATCH 330/651] Update test --- tests/queries/0_stateless/03155_datasketches_ubsan.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03155_datasketches_ubsan.sql b/tests/queries/0_stateless/03155_datasketches_ubsan.sql index 40a66e37a19..521301d03f7 100644 --- a/tests/queries/0_stateless/03155_datasketches_ubsan.sql +++ b/tests/queries/0_stateless/03155_datasketches_ubsan.sql @@ -1 +1,2 @@ +-- Tags: no-fasttest SELECT uniqTheta(toFixedString('uniqTheta distinct', 18)) FROM (SELECT number % 2 AS x FROM numbers(10) WHERE materialize(16)); From b53e9eec7b6560ebb67a5d868689494a7f0ab008 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 16 May 2024 18:17:46 +0200 Subject: [PATCH 331/651] Support for archives (unfinished) --- src/IO/S3/URI.h | 1 + .../ObjectStorage/ReadBufferIterator.cpp | 34 ++-- .../ObjectStorage/S3/Configuration.cpp | 8 + src/Storages/ObjectStorage/S3/Configuration.h | 3 + .../ObjectStorage/StorageObjectStorage.cpp | 10 ++ .../ObjectStorage/StorageObjectStorage.h | 4 + .../StorageObjectStorageSource.cpp | 146 +++++++++++++++++- .../StorageObjectStorageSource.h | 70 ++++++++- 8 files changed, 255 insertions(+), 21 deletions(-) diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index c52e6bc1441..363f98c46f5 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -29,6 +29,7 @@ struct URI std::string key; std::string version_id; std::string storage_name; + /// Path (or path pattern) in archive if uri is an archive. std::optional archive_pattern; std::string uri_str; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 3705725ffe1..61575b0115a 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -244,22 +245,35 @@ ReadBufferIterator::Data ReadBufferIterator::next() } } - std::unique_ptr read_buffer = object_storage->readObject( - StoredObject(current_object_info->relative_path), - getContext()->getReadSettings(), - {}, - current_object_info->metadata->size_bytes); + std::unique_ptr read_buf; + CompressionMethod compression_method; + using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; + if (auto object_info_in_archive = dynamic_cast(current_object_info.get())) + { + compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); + auto & archive_reader = object_info_in_archive->archive_reader; + read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); + } + else + { + compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method); + read_buf = object_storage->readObject( + StoredObject(current_object_info->relative_path), + getContext()->getReadSettings(), + {}, + current_object_info->metadata->size_bytes); + } - if (!query_settings.skip_empty_files || !read_buffer->eof()) + if (!query_settings.skip_empty_files || !read_buf->eof()) { first = false; - read_buffer = wrapReadBufferWithCompressionMethod( - std::move(read_buffer), - chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method), + read_buf = wrapReadBufferWithCompressionMethod( + std::move(read_buf), + compression_method, static_cast(getContext()->getSettingsRef().zstd_window_log_max)); - return {std::move(read_buffer), std::nullopt, format}; + return {std::move(read_buf), std::nullopt, format}; } } } diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 9fcbc6a6816..00d569fea9f 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -55,6 +55,14 @@ String StorageS3Configuration::getDataSourceDescription() return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; } +std::string StorageS3Configuration::getPathInArchive() const +{ + if (url.archive_pattern.has_value()) + return url.archive_pattern.value(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not an archive", getPath()); +} + void StorageS3Configuration::check(ContextPtr context) const { validateNamespace(url.bucket); diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index 0bd7f1ab108..de6c02d5020 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -34,6 +34,9 @@ public: String getDataSourceDescription() override; StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; + bool isArchive() const override { return url.archive_pattern.has_value(); } + std::string getPathInArchive() const override; + void check(ContextPtr context) const override; void validateNamespace(const String & name) const override; ConfigurationPtr clone() override { return std::make_shared(*this); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index bc5b347d1e0..73e3d861cff 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -452,6 +452,16 @@ std::string StorageObjectStorage::Configuration::getPathWithoutGlobs() const return getPath().substr(0, getPath().find_first_of("*?{")); } +bool StorageObjectStorage::Configuration::isPathInArchiveWithGlobs() const +{ + return getPathInArchive().find_first_of("*?{") != std::string::npos; +} + +std::string StorageObjectStorage::Configuration::getPathInArchive() const +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not archive", getPath()); +} + void StorageObjectStorage::Configuration::assertInitialized() const { if (!initialized) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 26b153ca0db..7b118cb7e6b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -175,6 +175,10 @@ public: bool isNamespaceWithGlobs() const; virtual std::string getPathWithoutGlobs() const; + virtual bool isArchive() const { return false; } + bool isPathInArchiveWithGlobs() const; + virtual std::string getPathInArchive() const; + virtual void check(ContextPtr context) const; virtual void validateNamespace(const String & /* name */) const {} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 8d5df96ca6e..56905e6c29b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -100,10 +101,11 @@ std::shared_ptr StorageObjectStorageSourc auto settings = configuration->getQuerySettings(local_context); + std::unique_ptr iterator; if (configuration->isPathWithGlobs()) { /// Iterate through disclosed globs and make a source for each file - return std::make_shared( + iterator = std::make_unique( object_storage, configuration, predicate, virtual_columns, local_context, read_keys, settings.list_object_keys_size, settings.throw_on_zero_files_match, file_progress_callback); @@ -123,10 +125,17 @@ std::shared_ptr StorageObjectStorageSourc copy_configuration->setPaths(keys); } - return std::make_shared( + iterator = std::make_unique( object_storage, copy_configuration, virtual_columns, read_keys, settings.ignore_non_existent_file, file_progress_callback); } + + if (configuration->isArchive()) + { + return std::make_shared(object_storage, configuration, std::move(iterator), local_context, read_keys); + } + + return iterator; } void StorageObjectStorageSource::lazyInitialize(size_t processor) @@ -262,9 +271,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade } else { - const auto compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method); + CompressionMethod compression_method; const auto max_parsing_threads = need_only_count ? std::optional(1) : std::nullopt; - read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes); + + if (auto object_info_in_archive = dynamic_cast(object_info.get())) + { + compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); + auto & archive_reader = object_info_in_archive->archive_reader; + read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); + } + else + { + compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method); + read_buf = createReadBuffer(*object_info); + } auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, @@ -312,8 +332,10 @@ std::future StorageObjectStorageSource return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); } -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const String & key, size_t object_size) +std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) { + const auto & object_size = object_info.metadata->size_bytes; + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; /// FIXME: Changing this setting to default value breaks something around parquet reading @@ -333,7 +355,7 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const S LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); auto async_reader = object_storage->readObjects( - StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings); + StoredObjects{StoredObject{object_info.relative_path, /* local_path */ "", object_size}}, read_settings); async_reader->setReadUntilEnd(); if (read_settings.remote_fs_prefetch) @@ -344,7 +366,7 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const S else { /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting. - return object_storage->readObject(StoredObject(key), read_settings); + return object_storage->readObject(StoredObject(object_info.relative_path, "", object_size), read_settings); } } @@ -609,4 +631,114 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator return buffer[current_index]; } +static IArchiveReader::NameFilter createArchivePathFilter(const std::string & archive_pattern) +{ + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(archive_pattern)); + if (!matcher->ok()) + { + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", + archive_pattern, matcher->error()); + } + return [matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); }; +} + +StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( + ObjectInfoPtr archive_object_, + const std::string & path_in_archive_, + std::shared_ptr archive_reader_) + : archive_object(archive_object_) + , path_in_archive(path_in_archive_) + , archive_reader(archive_reader_) +{ +} + +StorageObjectStorageSource::ArchiveIterator::ArchiveIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::unique_ptr archives_iterator_, + ContextPtr context_, + ObjectInfos * read_keys_) + : IIterator("ArchiveIterator") + , WithContext(context_) + , object_storage(object_storage_) + , is_path_in_archive_with_globs(configuration_->isPathInArchiveWithGlobs()) + , archives_iterator(std::move(archives_iterator_)) + , filter(is_path_in_archive_with_globs ? createArchivePathFilter(configuration_->getPathInArchive()) : IArchiveReader::NameFilter{}) + , path_in_archive(is_path_in_archive_with_globs ? "" : configuration_->getPathInArchive()) + , read_keys(read_keys_) +{ +} + +std::shared_ptr +StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr object_info) const +{ + const auto size = object_info->metadata->size_bytes; + return DB::createArchiveReader( + /* path_to_archive */object_info->relative_path, + /* archive_read_function */[=, this]() + { + StoredObject stored_object(object_info->relative_path, "", size); + return object_storage->readObject(stored_object, getContext()->getReadSettings()); + }, + /* archive_size */size); +} + +StorageObjectStorageSource::ObjectInfoPtr +StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) +{ + std::unique_lock lock{next_mutex}; + while (true) + { + if (filter) + { + if (!file_enumerator) + { + archive_object = archives_iterator->next(processor); + if (!archive_object) + return {}; + + archive_reader = createArchiveReader(archive_object); + file_enumerator = archive_reader->firstFile(); + if (!file_enumerator) + continue; + } + else if (!file_enumerator->nextFile()) + { + file_enumerator.reset(); + continue; + } + + path_in_archive = file_enumerator->getFileName(); + if (!filter(path_in_archive)) + continue; + } + else + { + archive_object = archives_iterator->next(processor); + if (!archive_object) + return {}; + + if (!archive_object->metadata) + archive_object->metadata = object_storage->getObjectMetadata(archive_object->relative_path); + + archive_reader = createArchiveReader(archive_object); + if (!archive_reader->fileExists(path_in_archive)) + continue; + } + + auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; + } +} + +size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() +{ + return archives_iterator->estimatedKeysCount(); +} + } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index abaf51edc4e..664aad56928 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -1,10 +1,11 @@ #pragma once +#include +#include +#include #include #include -#include -#include #include -#include +#include namespace DB @@ -25,6 +26,7 @@ public: class ReadTaskIterator; class GlobIterator; class KeysIterator; + class ArchiveIterator; StorageObjectStorageSource( String name_, @@ -109,7 +111,7 @@ protected: /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(size_t processor = 0); std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const String & key, size_t object_size); + std::unique_ptr createReadBuffer(const ObjectInfo & object_info); void addNumRowsToCache(const String & path, size_t num_rows); std::optional tryGetNumRowsFromCache(const ObjectInfoPtr & object_info); @@ -218,4 +220,64 @@ private: std::atomic index = 0; bool ignore_non_existent_files; }; + +/* + * An archives iterator. + * Allows to iterate files inside one or many archives. + * `archives_iterator` is an iterator which iterates over different archives. + * There are two ways to read files in archives: + * 1. When we want to read one concete file in each archive. + * In this case we go through all archives, check if this certain file + * exists within this archive and read it if it exists. + * 2. When we have a certain pattern of files we want to read in each archive. + * For this purpose we create a filter defined as IArchiveReader::NameFilter. + */ +class StorageObjectStorageSource::ArchiveIterator : public IIterator, private WithContext +{ +public: + explicit ArchiveIterator( + ObjectStoragePtr object_storage_, + ConfigurationPtr configuration_, + std::unique_ptr archives_iterator_, + ContextPtr context_, + ObjectInfos * read_keys_); + + size_t estimatedKeysCount() override; + + struct ObjectInfoInArchive : public ObjectInfo + { + ObjectInfoInArchive( + ObjectInfoPtr archive_object_, + const std::string & path_in_archive_, + std::shared_ptr archive_reader_); + + const ObjectInfoPtr archive_object; + const std::string path_in_archive; + const std::shared_ptr archive_reader; + }; + +private: + ObjectInfoPtr nextImpl(size_t processor) override; + std::shared_ptr createArchiveReader(ObjectInfoPtr object_info) const; + + const ObjectStoragePtr object_storage; + const bool is_path_in_archive_with_globs; + /// Iterator which iterates through different archives. + const std::unique_ptr archives_iterator; + /// Used when files inside archive are defined with a glob + const IArchiveReader::NameFilter filter = {}; + /// Current file inside the archive. + std::string path_in_archive = {}; + /// Read keys of files inside archives. + ObjectInfos * read_keys; + /// Object pointing to archive (NOT path within archive). + ObjectInfoPtr archive_object; + /// Reader of the archive. + std::shared_ptr archive_reader; + /// File enumerator inside the archive. + std::unique_ptr file_enumerator; + + std::mutex next_mutex; +}; + } From 7b81f3142ad9359d4e15f3455236f67f0c81cb14 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 11:19:10 +0200 Subject: [PATCH 332/651] Update src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index f5569d95a1b..fc5147bb56c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -781,8 +781,8 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (key_node_function_name == "arrayElement" && (function_name == "equals" || function_name == "notEquals")) { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in the map + * we return default the value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because * that way we skip necessary granules where map key does not exist. From eb06816a715c69359e160b39fae1654c0bf85e06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 11:19:15 +0200 Subject: [PATCH 333/651] Update src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 82a0e0f8cd1..6f46ee0c184 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -444,8 +444,8 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( if (key_function_node_function_name == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in the map + * we return default the value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because * that way we skip necessary granules where map key does not exist. From b9fc57c91d61eb6ee7627e9c94f6867e7d50636b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 11:19:20 +0200 Subject: [PATCH 334/651] Update src/Storages/MergeTree/MergeTreeIndexFullText.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index cea260e0a76..c5965415be5 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -477,8 +477,8 @@ bool MergeTreeConditionFullText::traverseASTEquals( if (function.getFunctionName() == "arrayElement") { /** Try to parse arrayElement for mapKeys index. - * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in map - * we return default value for arrayElement. + * It is important to ignore keys like column_map['Key'] = '' because if key does not exist in the map + * we return default the value for arrayElement. * * We cannot skip keys that does not exist in map if comparison is with default type value because * that way we skip necessary granules where map key does not exist. From bfb3fe0c230cbb00be9862c1f38e6c60e5fe6bb0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 May 2024 11:35:10 +0200 Subject: [PATCH 335/651] Fix libbcrypt for FreeBSD build Right now it fails due to [1] with the following error: /usr/work/ClickHouse/contrib/libbcrypt/crypt_blowfish/ow-crypt.h:27:14: error: conflicting types for 'crypt_r' 27 | extern char *crypt_r(__const char *key, __const char *setting, void *data); | ^ /usr/include/unistd.h:500:7: note: previous declaration is here 500 | char *crypt_r(const char *, const char *, struct crypt_data *); | ^ [1]: https://github.com/freebsd/freebsd-src/commit/5f521d7ba72145092ea23ff6081d8791ad6c1f9d Signed-off-by: Azat Khuzhin --- contrib/libbcrypt-cmake/CMakeLists.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt index d40d7f9195e..9e97f0af493 100644 --- a/contrib/libbcrypt-cmake/CMakeLists.txt +++ b/contrib/libbcrypt-cmake/CMakeLists.txt @@ -7,7 +7,7 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt") -set(SRCS +set(SRCS "${LIBRARY_DIR}/bcrypt.c" "${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c" "${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c" @@ -16,4 +16,13 @@ set(SRCS add_library(_bcrypt ${SRCS}) target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}") +# Avoid conflicts for crypt_r on FreeBSD [1]: +# +# - char *crypt_r(__const char *key, __const char *setting, void *data); +# - char *crypt_r(const char *, const char *, struct crypt_data *); +# +# [1]: https://github.com/freebsd/freebsd-src/commit/5f521d7ba72145092ea23ff6081d8791ad6c1f9d +# +# NOTE: ow-crypt.h is unsed only internally, so PRIVATE is enough +target_compile_definitions(_bcrypt PRIVATE -D__SKIP_GNU) add_library(ch_contrib::bcrypt ALIAS _bcrypt) From f0a2b85f052e88703ce6255addabeb842a47e8fe Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 17 May 2024 11:37:51 +0200 Subject: [PATCH 336/651] Fix test query --- .../02271_fix_column_matcher_and_column_transformer.sql | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql index 245b2cc97e3..b2a04788bbb 100644 --- a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql +++ b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql @@ -61,6 +61,11 @@ CREATE TABLE github_events ) ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at); -with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name; +with + top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), + last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), + last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), + last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) +select d.repo_name, columns('count') from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name; DROP TABLE github_events; From 61ee5e46ad50fcedd86f6d62d4c2bda2f6fedade Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 17 May 2024 11:43:33 +0200 Subject: [PATCH 337/651] Update the test --- .../03152_analyzer_columns_list.reference | 2 +- .../0_stateless/03152_analyzer_columns_list.sql | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.reference b/tests/queries/0_stateless/03152_analyzer_columns_list.reference index eefa8ebd513..4e9025b5baf 100644 --- a/tests/queries/0_stateless/03152_analyzer_columns_list.reference +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.reference @@ -1 +1 @@ -11323 8 +4 3 diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql index 2b19cdf37a2..baed3a4ff68 100644 --- a/tests/queries/0_stateless/03152_analyzer_columns_list.sql +++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql @@ -1,3 +1,13 @@ -SELECT COLUMNS(license_text, library_name) APPLY (length) FROM system.licenses ORDER BY library_name LIMIT 1; +CREATE TABLE test +( + foo String, + bar String, +) +ENGINE = MergeTree() +ORDER BY (foo, bar); -SELECT COLUMNS(license_text, library_name, xyz) APPLY (length) FROM system.licenses; -- { serverError UNKNOWN_IDENTIFIER } +INSERT INTO test VALUES ('foo', 'bar1'); + +SELECT COLUMNS(bar, foo) APPLY (length) FROM test; + +SELECT COLUMNS(bar, foo, xyz) APPLY (length) FROM test; -- { serverError UNKNOWN_IDENTIFIER } From 20a0e7338efd9b2918743c7ec7819b6b94816ffc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 17 May 2024 10:18:52 +0000 Subject: [PATCH 338/651] Throw exception for invalid inputs --- src/Core/Field.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 73f0703f21e..0e5b1bac000 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int CANNOT_RESTORE_FROM_FIELD_DUMP; extern const int DECIMAL_OVERFLOW; + extern const int INCORRECT_DATA; } template @@ -28,7 +29,7 @@ T DecimalField::getScaleMultiplier() const return DecimalUtils::scaleMultiplier(scale); } -inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) +Field getBinaryValue(UInt8 type, ReadBuffer & buf) { switch (static_cast(type)) { @@ -146,7 +147,7 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) case Field::Types::CustomType: return Field(); } - UNREACHABLE(); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field type {}", std::to_string(type)); } void readBinary(Array & x, ReadBuffer & buf) @@ -575,7 +576,7 @@ template bool decimalLessOrEqual(Decimal256 x, Decimal256 y, UInt32 template bool decimalLessOrEqual(DateTime64 x, DateTime64 y, UInt32 x_scale, UInt32 y_scale); -inline void writeText(const Null & x, WriteBuffer & buf) +void writeText(const Null & x, WriteBuffer & buf) { if (x.isNegativeInfinity()) writeText("-Inf", buf); From d3c9b5f52194f58cf06d829bceb0bb4e3cbe3c62 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 17 May 2024 13:08:50 +0200 Subject: [PATCH 339/651] Fix error codes --- src/Interpreters/executeQuery.cpp | 1 - src/Processors/QueryPlan/Optimizations/optimizeTree.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 67797f7d4f6..e603e2624aa 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -103,7 +103,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; extern const int QUERY_WAS_CANCELLED; - extern const int INCORRECT_DATA; extern const int SYNTAX_ERROR; extern const int SUPPORT_IS_DISABLED; extern const int INCORRECT_QUERY; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 4f512016c6b..df9e095af30 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -12,6 +12,7 @@ namespace DB namespace ErrorCodes { + extern const int INCORRECT_DATA; extern const int TOO_MANY_QUERY_PLAN_OPTIMIZATIONS; extern const int PROJECTION_NOT_USED; } From 782cbcfa58a5cfae54731fd955c19d8f4c516857 Mon Sep 17 00:00:00 2001 From: Pazitiff9 <99555202+Pazitiff9@users.noreply.github.com> Date: Fri, 17 May 2024 14:19:14 +0300 Subject: [PATCH 340/651] Correction of a typo in table.md Correction of a typo in ru docs about Create Table --- docs/ru/sql-reference/statements/create/table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index dbd6a325c40..a03ff7b1628 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -243,7 +243,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе «Compressing Time Stamps» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `GCD` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. -- `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Эффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. Кодеки `DoubleDelta` и `Gorilla` используются в TSDB Gorilla как компоненты алгоритма сжатия. Подход Gorilla эффективен в сценариях, когда данные представляют собой медленно изменяющиеся во времени величины. Метки времени эффективно сжимаются кодеком `DoubleDelta`, а значения кодеком `Gorilla`. Например, чтобы создать эффективно хранящуюся таблицу, используйте следующую конфигурацию: From 53e992af4ff6c2df33f46c597498baa38c327ee3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 17 May 2024 11:42:28 +0000 Subject: [PATCH 341/651] Remove some unnecessary UNREACHABLEs --- programs/keeper-client/Commands.cpp | 3 ++- programs/main.cpp | 2 +- src/Access/AccessEntityIO.cpp | 3 +-- src/Access/AccessRights.cpp | 1 - src/Access/IAccessStorage.cpp | 9 +++------ .../AggregateFunctionGroupArray.cpp | 13 ++++++------- .../AggregateFunctionSequenceNextNode.cpp | 1 - src/AggregateFunctions/AggregateFunctionSum.h | 13 ++++++------- src/Common/DateLUTImpl.cpp | 1 - src/Common/IntervalKind.cpp | 10 ---------- src/Common/TargetSpecific.cpp | 2 -- src/Common/ThreadProfileEvents.cpp | 1 - src/Common/ZooKeeper/IKeeper.cpp | 2 -- src/Compression/CompressionCodecDeflateQpl.cpp | 1 - src/Compression/CompressionCodecDoubleDelta.cpp | 3 +-- src/Coordination/KeeperReconfiguration.cpp | 8 +++++++- src/Coordination/KeeperServer.cpp | 3 ++- src/Core/Field.cpp | 1 - src/Core/Field.h | 2 -- src/DataTypes/Serializations/ISerialization.cpp | 1 - src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 1 - .../MetadataStorageTransactionState.cpp | 1 - src/Disks/VolumeJBOD.cpp | 2 -- src/Formats/EscapingRuleUtils.cpp | 1 - src/Functions/FunctionsRound.h | 4 ---- src/Functions/PolygonUtils.h | 2 -- .../UserDefinedSQLObjectsZooKeeperStorage.cpp | 1 - src/IO/CompressionMethod.cpp | 1 - src/IO/HadoopSnappyReadBuffer.h | 1 - src/Interpreters/AggregatedDataVariants.cpp | 8 -------- src/Interpreters/Cache/FileSegment.cpp | 1 - src/Interpreters/ComparisonGraph.cpp | 1 - src/Interpreters/FilesystemCacheLog.cpp | 1 - src/Interpreters/HashJoin.cpp | 3 --- .../InterpreterTransactionControlQuery.cpp | 1 - src/Interpreters/SetVariants.cpp | 4 ---- src/Parsers/ASTExplainQuery.h | 2 -- src/Parsers/Lexer.cpp | 4 ---- .../Formats/Impl/MsgPackRowInputFormat.cpp | 1 - src/Processors/IProcessor.cpp | 2 -- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 ------ src/Processors/QueryPlan/TotalsHavingStep.cpp | 2 -- src/Processors/Transforms/FillingTransform.cpp | 1 - .../Transforms/buildPushingToViewsChain.cpp | 2 -- src/Storages/MergeTree/BackgroundJobsAssignee.cpp | 1 - src/Storages/MergeTree/KeyCondition.cpp | 2 -- src/Storages/MergeTree/MergeTreeData.cpp | 2 -- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 -- src/Storages/WindowView/StorageWindowView.cpp | 1 - 49 files changed, 29 insertions(+), 112 deletions(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index ec5eaf5070c..38c3d4356f6 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -11,6 +11,7 @@ namespace DB namespace ErrorCodes { extern const int KEEPER_EXCEPTION; + extern const int UNEXPECTED_ZOOKEEPER_ERROR; } bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -441,7 +442,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient new_members = query->args[1].safeGet(); break; default: - UNREACHABLE(); + throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected operation: {}", operation); } auto response = client->zookeeper->reconfig(joining, leaving, new_members); diff --git a/programs/main.cpp b/programs/main.cpp index 4bb73399719..48985ea683f 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -155,8 +155,8 @@ auto instructionFailToString(InstructionFail fail) ret("AVX2"); case InstructionFail::AVX512: ret("AVX512"); +#undef ret } - UNREACHABLE(); } diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index b0dfd74c53b..1b073329296 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -144,8 +144,7 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String catch (Exception & e) { e.addMessage("Could not parse " + file_path); - e.rethrow(); - UNREACHABLE(); + throw; } } diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index c10931f554c..dd25d3e4ac0 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -258,7 +258,6 @@ namespace case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel(); case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel(); } - UNREACHABLE(); } } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 8e51481e415..8d4e7d3073e 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -257,8 +257,7 @@ std::vector IAccessStorage::insert(const std::vector & mu } e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str); } - e.rethrow(); - UNREACHABLE(); + throw; } } @@ -361,8 +360,7 @@ std::vector IAccessStorage::remove(const std::vector & ids, bool thr } e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str); } - e.rethrow(); - UNREACHABLE(); + throw; } } @@ -458,8 +456,7 @@ std::vector IAccessStorage::update(const std::vector & ids, const Up } e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str); } - e.rethrow(); - UNREACHABLE(); + throw; } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index d4fb7afcb78..930b2c6ce73 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -60,14 +60,13 @@ struct GroupArrayTrait template constexpr const char * getNameByTrait() { - if (Trait::last) + if constexpr (Trait::last) return "groupArrayLast"; - if (Trait::sampler == Sampler::NONE) - return "groupArray"; - else if (Trait::sampler == Sampler::RNG) - return "groupArraySample"; - - UNREACHABLE(); + switch (Trait::sampler) + { + case Sampler::NONE: return "groupArray"; + case Sampler::RNG: return "groupArraySample"; + } } template diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp index bed10333af0..a9dd53a75e8 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp @@ -414,7 +414,6 @@ public: break; return (i == events_size) ? base - i : unmatched_idx; } - UNREACHABLE(); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 58aaddf357a..2f23187d2ea 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -457,13 +457,12 @@ public: String getName() const override { - if constexpr (Type == AggregateFunctionTypeSum) - return "sum"; - else if constexpr (Type == AggregateFunctionTypeSumWithOverflow) - return "sumWithOverflow"; - else if constexpr (Type == AggregateFunctionTypeSumKahan) - return "sumKahan"; - UNREACHABLE(); + switch (Type) + { + case AggregateFunctionTypeSum: return "sum"; + case AggregateFunctionTypeSumWithOverflow: return "sumWithOverflow"; + case AggregateFunctionTypeSumKahan: return "sumKahan"; + } } explicit AggregateFunctionSum(const DataTypes & argument_types_) diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 392ee64dcbf..c87d44a4b95 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -41,7 +41,6 @@ UInt8 getDayOfWeek(const cctz::civil_day & date) case cctz::weekday::saturday: return 6; case cctz::weekday::sunday: return 7; } - UNREACHABLE(); } inline cctz::time_point lookupTz(const cctz::time_zone & cctz_time_zone, const cctz::civil_day & date) diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 22c7db504c3..1548d5cf9a5 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -34,8 +34,6 @@ Int64 IntervalKind::toAvgNanoseconds() const default: return toAvgSeconds() * NANOSECONDS_PER_SECOND; } - - UNREACHABLE(); } Int32 IntervalKind::toAvgSeconds() const @@ -54,7 +52,6 @@ Int32 IntervalKind::toAvgSeconds() const case IntervalKind::Kind::Quarter: return 7889238; /// Exactly 1/4 of a year. case IntervalKind::Kind::Year: return 31556952; /// The average length of a Gregorian year is equal to 365.2425 days } - UNREACHABLE(); } Float64 IntervalKind::toSeconds() const @@ -80,7 +77,6 @@ Float64 IntervalKind::toSeconds() const default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not possible to get precise number of seconds in non-precise interval"); } - UNREACHABLE(); } bool IntervalKind::isFixedLength() const @@ -99,7 +95,6 @@ bool IntervalKind::isFixedLength() const case IntervalKind::Kind::Quarter: case IntervalKind::Kind::Year: return false; } - UNREACHABLE(); } IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds) @@ -141,7 +136,6 @@ const char * IntervalKind::toKeyword() const case IntervalKind::Kind::Quarter: return "QUARTER"; case IntervalKind::Kind::Year: return "YEAR"; } - UNREACHABLE(); } @@ -161,7 +155,6 @@ const char * IntervalKind::toLowercasedKeyword() const case IntervalKind::Kind::Quarter: return "quarter"; case IntervalKind::Kind::Year: return "year"; } - UNREACHABLE(); } @@ -192,7 +185,6 @@ const char * IntervalKind::toDateDiffUnit() const case IntervalKind::Kind::Year: return "year"; } - UNREACHABLE(); } @@ -223,7 +215,6 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const case IntervalKind::Kind::Year: return "toIntervalYear"; } - UNREACHABLE(); } @@ -257,7 +248,6 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const case IntervalKind::Kind::Year: return "toYear"; } - UNREACHABLE(); } diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index 49f396c0926..8540c9a9986 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -54,8 +54,6 @@ String toString(TargetArch arch) case TargetArch::AMXTILE: return "amxtile"; case TargetArch::AMXINT8: return "amxint8"; } - - UNREACHABLE(); } } diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 6a63d484cd9..23b41f23bde 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -75,7 +75,6 @@ const char * TasksStatsCounters::metricsProviderString(MetricsProvider provider) case MetricsProvider::Netlink: return "netlink"; } - UNREACHABLE(); } bool TasksStatsCounters::checkIfAvailable() diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 7d2602bde1e..7cca262baca 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -146,8 +146,6 @@ const char * errorMessage(Error code) case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; case Error::ZNOTREADONLY: return "State-changing request is passed to read-only server"; } - - UNREACHABLE(); } bool isHardwareError(Error zk_return_code) diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index 7e0653c69f8..f1b5b24e866 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -466,7 +466,6 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); return; } - UNREACHABLE(); } void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests() diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index e6e8db4c699..78fdf5c627a 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -163,9 +163,8 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size) case sizeof(UInt64): return std::numeric_limits::max(); default: - assert(false && "only 1, 2, 4 and 8 data sizes are supported"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "only 1, 2, 4 and 8 data sizes are supported"); } - UNREACHABLE(); } struct WriteSpec diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp index e3642913a7a..a2a06f92283 100644 --- a/src/Coordination/KeeperReconfiguration.cpp +++ b/src/Coordination/KeeperReconfiguration.cpp @@ -5,6 +5,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int UNEXPECTED_ZOOKEEPER_ERROR; +} + ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining) { ClusterUpdateActions out; @@ -79,7 +85,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)}); } else - UNREACHABLE(); + throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected update"); } for (const auto & item : cfg->get_servers()) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 8d21ce2ab01..b132c898be6 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -45,6 +45,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int LOGICAL_ERROR; extern const int INVALID_CONFIG_PARAMETER; + extern const int UNEXPECTED_ZOOKEEPER_ERROR; } using namespace std::chrono_literals; @@ -990,7 +991,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate( raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); return Accepted; } - UNREACHABLE(); + throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected action"); } ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 73f0703f21e..7207485c799 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -146,7 +146,6 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) case Field::Types::CustomType: return Field(); } - UNREACHABLE(); } void readBinary(Array & x, ReadBuffer & buf) diff --git a/src/Core/Field.h b/src/Core/Field.h index 4424d669c4d..710614cd0a0 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -667,8 +667,6 @@ public: case Types::AggregateFunctionState: return f(field.template get()); case Types::CustomType: return f(field.template get()); } - - UNREACHABLE(); } String dump() const; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index a3a28f8091c..cd605c93f0d 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -36,7 +36,6 @@ String ISerialization::kindToString(Kind kind) case Kind::SPARSE: return "Sparse"; } - UNREACHABLE(); } ISerialization::Kind ISerialization::stringToKind(const String & str) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 3433698a162..cb34f7932c3 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -140,7 +140,6 @@ private: case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; } - UNREACHABLE(); } size_t first_offset = 0; diff --git a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp index 245578b5d9e..a37f4ce7e65 100644 --- a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp @@ -17,7 +17,6 @@ std::string toString(MetadataStorageTransactionState state) case MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK: return "PARTIALLY_ROLLED_BACK"; } - UNREACHABLE(); } } diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index a0c71583a22..e796ad6cdd7 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -112,7 +112,6 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const return disks_by_size.top().disk; } } - UNREACHABLE(); } ReservationPtr VolumeJBOD::reserve(UInt64 bytes) @@ -164,7 +163,6 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes) return reservation; } } - UNREACHABLE(); } bool VolumeJBOD::areMergesAvoided() const diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 3edade639df..2fe29d8bebb 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -62,7 +62,6 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule) case FormatSettings::EscapingRule::Raw: return "Raw"; } - UNREACHABLE(); } void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 99f3a14dfec..233d4058f11 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -149,8 +149,6 @@ struct IntegerRoundingComputation return x; } } - - UNREACHABLE(); } static ALWAYS_INLINE T compute(T x, T scale) @@ -163,8 +161,6 @@ struct IntegerRoundingComputation case ScaleMode::Negative: return computeImpl(x, scale); } - - UNREACHABLE(); } static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index c4851718da6..57f1243537d 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -381,8 +381,6 @@ bool PointInPolygonWithGrid::contains(CoordinateType x, Coordina case CellType::complexPolygon: return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]); } - - UNREACHABLE(); } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index 568e0b9b5d2..766d63eafb0 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -35,7 +35,6 @@ namespace case UserDefinedSQLObjectType::Function: return "function_"; } - UNREACHABLE(); } constexpr std::string_view sql_extension = ".sql"; diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index b8e1134d422..22913125e99 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -52,7 +52,6 @@ std::string toContentEncodingName(CompressionMethod method) case CompressionMethod::None: return ""; } - UNREACHABLE(); } CompressionMethod chooseHTTPCompressionMethod(const std::string & list) diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index 73e52f2c503..bbbb84dd6dd 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -88,7 +88,6 @@ public: case Status::TOO_LARGE_COMPRESSED_BLOCK: return "TOO_LARGE_COMPRESSED_BLOCK"; } - UNREACHABLE(); } explicit HadoopSnappyReadBuffer( diff --git a/src/Interpreters/AggregatedDataVariants.cpp b/src/Interpreters/AggregatedDataVariants.cpp index 87cfdda5948..8f82f15248f 100644 --- a/src/Interpreters/AggregatedDataVariants.cpp +++ b/src/Interpreters/AggregatedDataVariants.cpp @@ -117,8 +117,6 @@ size_t AggregatedDataVariants::size() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } size_t AggregatedDataVariants::sizeWithoutOverflowRow() const @@ -136,8 +134,6 @@ size_t AggregatedDataVariants::sizeWithoutOverflowRow() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } const char * AggregatedDataVariants::getMethodName() const @@ -155,8 +151,6 @@ const char * AggregatedDataVariants::getMethodName() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } bool AggregatedDataVariants::isTwoLevel() const @@ -174,8 +168,6 @@ bool AggregatedDataVariants::isTwoLevel() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } - - UNREACHABLE(); } bool AggregatedDataVariants::isConvertibleToTwoLevel() const diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 9459029dc4c..61a356fa3c3 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -799,7 +799,6 @@ String FileSegment::stateToString(FileSegment::State state) case FileSegment::State::DETACHED: return "DETACHED"; } - UNREACHABLE(); } bool FileSegment::assertCorrectness() const diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index 4eacbae7a30..d53ff4b0227 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -309,7 +309,6 @@ ComparisonGraphCompareResult ComparisonGraph::pathToCompareResult(Path pat case Path::GREATER: return inverse ? ComparisonGraphCompareResult::LESS : ComparisonGraphCompareResult::GREATER; case Path::GREATER_OR_EQUAL: return inverse ? ComparisonGraphCompareResult::LESS_OR_EQUAL : ComparisonGraphCompareResult::GREATER_OR_EQUAL; } - UNREACHABLE(); } template diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 80fe1c3a8ef..aa489351a98 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -26,7 +26,6 @@ static String typeToString(FilesystemCacheLogElement::CacheType type) case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE: return "WRITE_THROUGH_CACHE"; } - UNREACHABLE(); } ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 3a21c13db5e..75da8bbc3e7 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -705,7 +705,6 @@ namespace APPLY_FOR_JOIN_VARIANTS(M) #undef M } - UNREACHABLE(); } } @@ -2641,8 +2640,6 @@ private: default: throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); } - - UNREACHABLE(); } template diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index d31ace758c4..13872fbe3f5 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -33,7 +33,6 @@ BlockIO InterpreterTransactionControlQuery::execute() case ASTTransactionControl::SET_SNAPSHOT: return executeSetSnapshot(session_context, tcl.snapshot); } - UNREACHABLE(); } BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context) diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index 64796a013f1..c600d096160 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -41,8 +41,6 @@ size_t SetVariantsTemplate::getTotalRowCount() const APPLY_FOR_SET_VARIANTS(M) #undef M } - - UNREACHABLE(); } template @@ -57,8 +55,6 @@ size_t SetVariantsTemplate::getTotalByteCount() const APPLY_FOR_SET_VARIANTS(M) #undef M } - - UNREACHABLE(); } template diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index 701bde8cebd..eb095b5dbbc 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -40,8 +40,6 @@ public: case TableOverride: return "EXPLAIN TABLE OVERRIDE"; case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION"; } - - UNREACHABLE(); } static ExplainKind fromString(const String & str) diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 9ac6e623803..30717550713 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -41,8 +41,6 @@ Token quotedString(const char *& pos, const char * const token_begin, const char ++pos; continue; } - - UNREACHABLE(); } } @@ -538,8 +536,6 @@ const char * getTokenName(TokenType type) APPLY_FOR_TOKENS(M) #undef M } - - UNREACHABLE(); } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 98cbdeaaa4b..6b7f1f5206c 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -657,7 +657,6 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type()); } } - UNREACHABLE(); } std::optional MsgPackSchemaReader::readRowAndGetDataTypes() diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 8b160153733..5ab5e5277aa 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -36,8 +36,6 @@ std::string IProcessor::statusToName(Status status) case Status::ExpandPipeline: return "ExpandPipeline"; } - - UNREACHABLE(); } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e523a2c243c..2f7927681aa 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1136,8 +1136,6 @@ static void addMergingFinal( return std::make_shared(header, num_outputs, sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, merging_params.graphite_params, now); } - - UNREACHABLE(); }; pipe.addTransform(get_merging_processor()); @@ -2143,8 +2141,6 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type) case ReadFromMergeTree::IndexType::Skip: return "Skip"; } - - UNREACHABLE(); } static const char * readTypeToString(ReadFromMergeTree::ReadType type) @@ -2160,8 +2156,6 @@ static const char * readTypeToString(ReadFromMergeTree::ReadType type) case ReadFromMergeTree::ReadType::ParallelReplicas: return "Parallel"; } - - UNREACHABLE(); } void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index d1bd70fd0b2..ac5e144bf4a 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -86,8 +86,6 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr case TotalsMode::AFTER_HAVING_AUTO: return "after_having_auto threshold " + std::to_string(auto_include_threshold); } - - UNREACHABLE(); } void TotalsHavingStep::describeActions(FormatSettings & settings) const diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 05fd2a7254f..bb38c3e1dc5 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -67,7 +67,6 @@ static FillColumnDescription::StepFunction getStepFunction( FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE } - UNREACHABLE(); } static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5e8ecdca95e..20977b801d3 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -897,8 +897,6 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St { return std::current_exception(); } - - UNREACHABLE(); } void FinalizingViewsTransform::work() diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp index 56a4378cf9a..0a69bf1109f 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp @@ -93,7 +93,6 @@ String BackgroundJobsAssignee::toString(Type type) case Type::Moving: return "Moving"; } - UNREACHABLE(); } void BackgroundJobsAssignee::start() diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 849240502e4..dbc98404569 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -2957,8 +2957,6 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri case ALWAYS_TRUE: return "true"; } - - UNREACHABLE(); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f448a9a820d..6b6adf56cd2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1175,8 +1175,6 @@ String MergeTreeData::MergingParams::getModeName() const case Graphite: return "Graphite"; case VersionedCollapsing: return "VersionedCollapsing"; } - - UNREACHABLE(); } Int64 MergeTreeData::getMaxBlockNumber() const diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index daa163d741c..395d27558f3 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -360,8 +360,6 @@ Block MergeTreeDataWriter::mergeBlock( return std::make_shared( block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr)); } - - UNREACHABLE(); }; auto merging_algorithm = get_merging_algorithm(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a9ec1f6c694..4e11787cecf 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -297,7 +297,6 @@ namespace CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - UNREACHABLE(); } class AddingAggregatedChunkInfoTransform : public ISimpleTransform From ff3dbe97573b1b1e877f9b521bcaa1e839ae8288 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 17 May 2024 13:57:27 +0200 Subject: [PATCH 342/651] empty commit From e4ba32a831929505ef01b65c51ec949d3df513b2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 17 May 2024 14:09:08 +0200 Subject: [PATCH 343/651] Try #2 after removing fast tests flag From 07349ec50841058ee657bc2230252c4ef7504c22 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 17 May 2024 14:22:39 +0200 Subject: [PATCH 344/651] Fix formatting issues in `in.md` --- docs/en/sql-reference/operators/in.md | 51 ++++++++++++++------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index bfad16f8365..74eebb2538c 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -18,7 +18,7 @@ If the left side is a single column that is in the index, and the right side is Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery. -The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets. +The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or `SELECT` subquery in brackets. ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal). @@ -43,15 +43,16 @@ If the right side of the operator is the name of a table (for example, `UserID I If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query. The subquery may specify more than one column for filtering tuples. + Example: ``` sql SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ... ``` -The columns to the left and right of the IN operator should have the same type. +The columns to the left and right of the `IN` operator should have the same type. -The IN operator and subquery may occur in any part of the query, including in aggregate functions and lambda functions. +The `IN` operator and subquery may occur in any part of the query, including in aggregate functions and lambda functions. Example: ``` sql @@ -81,7 +82,7 @@ ORDER BY EventDate ASC ``` For each day after March 17th, count the percentage of pageviews made by users who visited the site on March 17th. -A subquery in the IN clause is always run just one time on a single server. There are no dependent subqueries. +A subquery in the `IN` clause is always run just one time on a single server. There are no dependent subqueries. ## NULL Processing @@ -120,13 +121,13 @@ FROM t_null ## Distributed Subqueries -There are two options for IN-s with subqueries (similar to JOINs): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. +There are two options for `IN` operators with subqueries (similar to `JOIN` operators): normal `IN` / `JOIN` and `GLOBAL IN` / `GLOBAL JOIN`. They differ in how they are run for distributed query processing. :::note Remember that the algorithms described below may work differently depending on the [settings](../../operations/settings/settings.md) `distributed_product_mode` setting. ::: -When using the regular IN, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. +When using the regular `IN`, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. When using `GLOBAL IN` / `GLOBAL JOINs`, first all the subqueries are run for `GLOBAL IN` / `GLOBAL JOINs`, and the results are collected in temporary tables. Then the temporary tables are sent to each remote server, where the queries are run using this temporary data. @@ -152,7 +153,7 @@ SELECT uniq(UserID) FROM local_table and run on each of them in parallel, until it reaches the stage where intermediate results can be combined. Then the intermediate results will be returned to the requestor server and merged on it, and the final result will be sent to the client. -Now let’s examine a query with IN: +Now let’s examine a query with `IN`: ``` sql SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) @@ -166,7 +167,7 @@ This query will be sent to all remote servers as SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) ``` -In other words, the data set in the IN clause will be collected on each server independently, only across the data that is stored locally on each of the servers. +In other words, the data set in the `IN` clause will be collected on each server independently, only across the data that is stored locally on each of the servers. This will work correctly and optimally if you are prepared for this case and have spread data across the cluster servers such that the data for a single UserID resides entirely on a single server. In this case, all the necessary data will be available locally on each server. Otherwise, the result will be inaccurate. We refer to this variation of the query as “local IN”. @@ -182,7 +183,7 @@ This query will be sent to all remote servers as SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) ``` -The subquery will begin running on each remote server. Since the subquery uses a distributed table, the subquery that is on each remote server will be resent to every remote server as +The subquery will begin running on each remote server. Since the subquery uses a distributed table, the subquery that is on each remote server will be resent to every remote server as: ``` sql SELECT UserID FROM local_table WHERE CounterID = 34 @@ -190,33 +191,33 @@ SELECT UserID FROM local_table WHERE CounterID = 34 For example, if you have a cluster of 100 servers, executing the entire query will require 10,000 elementary requests, which is generally considered unacceptable. -In such cases, you should always use GLOBAL IN instead of IN. Let’s look at how it works for the query +In such cases, you should always use `GLOBAL IN` instead of `IN`. Let’s look at how it works for the query: ``` sql SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID GLOBAL IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) ``` -The requestor server will run the subquery +The requestor server will run the subquery: ``` sql SELECT UserID FROM distributed_table WHERE CounterID = 34 ``` -and the result will be put in a temporary table in RAM. Then the request will be sent to each remote server as +and the result will be put in a temporary table in RAM. Then the request will be sent to each remote server as: ``` sql SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 ``` -and the temporary table `_data1` will be sent to every remote server with the query (the name of the temporary table is implementation-defined). +The temporary table `_data1` will be sent to every remote server with the query (the name of the temporary table is implementation-defined). -This is more optimal than using the normal IN. However, keep the following points in mind: +This is more optimal than using the normal `IN`. However, keep the following points in mind: -1. When creating a temporary table, data is not made unique. To reduce the volume of data transmitted over the network, specify DISTINCT in the subquery. (You do not need to do this for a normal IN.) -2. The temporary table will be sent to all the remote servers. Transmission does not account for network topology. For example, if 10 remote servers reside in a datacenter that is very remote in relation to the requestor server, the data will be sent 10 times over the channel to the remote datacenter. Try to avoid large data sets when using GLOBAL IN. +1. When creating a temporary table, data is not made unique. To reduce the volume of data transmitted over the network, specify DISTINCT in the subquery. (You do not need to do this for a normal `IN`.) +2. The temporary table will be sent to all the remote servers. Transmission does not account for network topology. For example, if 10 remote servers reside in a datacenter that is very remote in relation to the requestor server, the data will be sent 10 times over the channel to the remote datacenter. Try to avoid large data sets when using `GLOBAL IN`. 3. When transmitting data to remote servers, restrictions on network bandwidth are not configurable. You might overload the network. -4. Try to distribute data across servers so that you do not need to use GLOBAL IN on a regular basis. -5. If you need to use GLOBAL IN often, plan the location of the ClickHouse cluster so that a single group of replicas resides in no more than one data center with a fast network between them, so that a query can be processed entirely within a single data center. +4. Try to distribute data across servers so that you do not need to use `GLOBAL IN` on a regular basis. +5. If you need to use `GLOBAL IN` often, plan the location of the ClickHouse cluster so that a single group of replicas resides in no more than one data center with a fast network between them, so that a query can be processed entirely within a single data center. It also makes sense to specify a local table in the `GLOBAL IN` clause, in case this local table is only available on the requestor server and you want to use data from it on remote servers. @@ -224,36 +225,38 @@ It also makes sense to specify a local table in the `GLOBAL IN` clause, in case You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is transferred during distributed queries. -This is specially important if the `global in` query returns a large amount of data. Consider the following sql - +This is specially important if the `GLOBAL IN` query returns a large amount of data. Consider the following SQL: + ```sql select * from table1 where col1 global in (select col1 from table2 where ) ``` -If `some_predicate` is not selective enough, it will return large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met. +If `some_predicate` is not selective enough, it will return a large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met. ### Distributed Subqueries and max_parallel_replicas When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed. For example, the following: + ```sql SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) SETTINGS max_parallel_replicas=3 ``` -is transformed on each server into +is transformed on each server into: ```sql SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M ``` -where M is between 1 and 3 depending on which replica the local query is executing on. +where `M` is between `1` and `3` depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table. -Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN. +Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`. -One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. +One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour. From 8a2451553702df162627352173a96b01f23c631d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 17 May 2024 14:23:32 +0200 Subject: [PATCH 345/651] fix black --- .../test_host_resolver_fail_count/test_case.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_host_resolver_fail_count/test_case.py b/tests/integration/test_host_resolver_fail_count/test_case.py index fa23ecf3e74..d25681f0781 100644 --- a/tests/integration/test_host_resolver_fail_count/test_case.py +++ b/tests/integration/test_host_resolver_fail_count/test_case.py @@ -1,4 +1,5 @@ """Test Interserver responses on configured IP.""" + import pytest import time from helpers.cluster import ClickHouseCluster @@ -23,7 +24,7 @@ def start_cluster(): # The same value as in ClickHouse, this can't be confugured via config now -DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2*60 +DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2 * 60 def test_host_resolver(start_cluster): @@ -36,7 +37,7 @@ def test_host_resolver(start_cluster): (node.ip_address, "minio1"), # no answer on 9001 port on this IP ] ) - + node.query("SYSTEM DROP DNS CACHE") node.query("SYSTEM DROP CONNECTIONS CACHE") @@ -94,7 +95,9 @@ def test_host_resolver(start_cluster): INSERT INTO test VALUES (101,{k}) """ ) - intermediate_fails = node.query("SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'") + intermediate_fails = node.query( + "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" + ) k += 1 if k >= limit: # Dead IP was not choosen for 100 iteration. From 30e1802c2718ea89b22b716a71f9d9d86b068694 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 17 May 2024 14:28:12 +0200 Subject: [PATCH 346/651] Fix ugly formatting --- docs/en/sql-reference/operators/in.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 74eebb2538c..0257d21b30f 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -129,7 +129,7 @@ Remember that the algorithms described below may work differently depending on t When using the regular `IN`, the query is sent to remote servers, and each of them runs the subqueries in the `IN` or `JOIN` clause. -When using `GLOBAL IN` / `GLOBAL JOINs`, first all the subqueries are run for `GLOBAL IN` / `GLOBAL JOINs`, and the results are collected in temporary tables. Then the temporary tables are sent to each remote server, where the queries are run using this temporary data. +When using `GLOBAL IN` / `GLOBAL JOIN`, first all the subqueries are run for `GLOBAL IN` / `GLOBAL JOIN`, and the results are collected in temporary tables. Then the temporary tables are sent to each remote server, where the queries are run using this temporary data. For a non-distributed query, use the regular `IN` / `JOIN`. From 803a52ba944fb00b8a3a266317f50de063dd534f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 17 May 2024 14:45:21 +0200 Subject: [PATCH 347/651] Do not create new release in release branch automatically --- tests/ci/release.py | 9 +++++---- tests/ci/version_helper.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/ci/release.py b/tests/ci/release.py index 2775d31285e..2ae82177c67 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -406,7 +406,7 @@ class Release: def _bump_release_branch(self): # Update only git, original version stays the same self._git.update() - new_version = self.version.patch_update() + new_version = self.version.copy() version_type = self.get_stable_release_type() pr_labels = f"--label {Labels.RELEASE}" if version_type == VersionType.LTS: @@ -432,9 +432,10 @@ class Release: "changes with it.'", dry_run=self.dry_run, ) - with self._create_gh_release(False): - # Here the release branch part is done - yield + # Here the release branch part is done. + # We don't create a release itself automatically to have a + # safe window to backport possible bug fixes. + yield @contextmanager def _bump_version_in_master(self, helper_branch: str) -> Iterator[None]: diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index f649732171f..800bfcf52c3 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -165,6 +165,21 @@ class ClickHouseVersion: self._description = version_type self._describe = f"v{self.string}-{version_type}" + def copy(self) -> "ClickHouseVersion": + copy = ClickHouseVersion( + self.major, + self.minor, + self.patch, + self.revision, + self._git, + str(self.tweak), + ) + try: + copy.with_description(self.description) + except ValueError: + pass + return copy + def __eq__(self, other: Any) -> bool: if not isinstance(self, type(other)): return NotImplemented From 226de66d3603e65aa10e6b687642cd6a89edeb7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 17 May 2024 13:33:14 +0000 Subject: [PATCH 348/651] Add missing settings to recoverLostReplica --- src/Databases/DatabaseReplicated.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7b1c49c2cf8..78d502ec2c7 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -944,6 +944,13 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep query_context->setSetting("allow_hyperscan", 1); query_context->setSetting("allow_simdjson", 1); query_context->setSetting("allow_deprecated_syntax_for_merge_tree", 1); + query_context->setSetting("allow_suspicious_primary_key", 1); + query_context->setSetting("allow_suspicious_ttl_expressions", 1); + query_context->setSetting("allow_suspicious_variant_types", 1); + query_context->setSetting("enable_deflate_qpl_codec", 1); + query_context->setSetting("enable_zstd_qat_codec", 1); + query_context->setSetting("allow_create_index_without_type", 1); + query_context->setSetting("allow_experimental_s3queue", 1); auto txn = std::make_shared(current_zookeeper, zookeeper_path, false, ""); query_context->initZooKeeperMetadataTransaction(txn); From 85aa8b71a4094b592099f9913563fe573fc05f73 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 May 2024 15:57:12 +0200 Subject: [PATCH 349/651] Fix searching for libclang_rt.builtins.*.a on FreeBSD Signed-off-by: Azat Khuzhin --- cmake/freebsd/default_libs.cmake | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 1eeb1a872bd..6bde75f8c9a 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -1,11 +1,23 @@ set (DEFAULT_LIBS "-nodefaultlibs") if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64") - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-x86_64.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + set(system_processor "x86_64") else () - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + set(system_processor "${CMAKE_SYSTEM_PROCESSOR}") endif () +file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/") +message(STATUS "-Bprefix: ${bprefix}") + +execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +# --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option +if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a") + message(FATAL_ERROR "libclang_rt.builtins had not been found") +endif() + set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread") message(STATUS "Default libraries: ${DEFAULT_LIBS}") From e560bd8a1a9c57640af1303a95f0a81d864c75e3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 17 May 2024 14:37:47 +0000 Subject: [PATCH 350/651] Incorporate review feedback --- src/Access/AccessRights.cpp | 1 + src/AggregateFunctions/AggregateFunctionSum.h | 12 ++++++------ src/Compression/CompressionCodecDoubleDelta.cpp | 4 ++-- src/Coordination/KeeperReconfiguration.cpp | 4 ++-- src/Coordination/KeeperServer.cpp | 2 +- src/Core/Field.cpp | 1 + src/Functions/FunctionsTimeWindow.cpp | 2 -- src/Parsers/Lexer.cpp | 2 ++ 8 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index dd25d3e4ac0..2127f4ada70 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -258,6 +258,7 @@ namespace case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel(); case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel(); } + chassert(false); } } diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 2f23187d2ea..2ce03c530c2 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -457,12 +457,12 @@ public: String getName() const override { - switch (Type) - { - case AggregateFunctionTypeSum: return "sum"; - case AggregateFunctionTypeSumWithOverflow: return "sumWithOverflow"; - case AggregateFunctionTypeSumKahan: return "sumKahan"; - } + if constexpr (Type == AggregateFunctionTypeSum) + return "sum"; + else if constexpr (Type == AggregateFunctionTypeSumWithOverflow) + return "sumWithOverflow"; + else if constexpr (Type == AggregateFunctionTypeSumKahan) + return "sumKahan"; } explicit AggregateFunctionSum(const DataTypes & argument_types_) diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 78fdf5c627a..443b9d33532 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -142,9 +142,9 @@ namespace ErrorCodes { extern const int CANNOT_COMPRESS; extern const int CANNOT_DECOMPRESS; - extern const int BAD_ARGUMENTS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; + extern const int LOGICAL_ERROR; } namespace @@ -163,7 +163,7 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size) case sizeof(UInt64): return std::numeric_limits::max(); default: - throw Exception(ErrorCodes::BAD_ARGUMENTS, "only 1, 2, 4 and 8 data sizes are supported"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "only 1, 2, 4 and 8 data sizes are supported"); } } diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp index a2a06f92283..05211af6704 100644 --- a/src/Coordination/KeeperReconfiguration.cpp +++ b/src/Coordination/KeeperReconfiguration.cpp @@ -8,7 +8,7 @@ namespace DB namespace ErrorCodes { - extern const int UNEXPECTED_ZOOKEEPER_ERROR; + extern const int LOGICAL_ERROR; } ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining) @@ -85,7 +85,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)}); } else - throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected update"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected update"); } for (const auto & item : cfg->get_servers()) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b132c898be6..953072c5b0e 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -991,7 +991,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate( raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); return Accepted; } - throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected action"); + chassert(false); } ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 7207485c799..73f0703f21e 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -146,6 +146,7 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) case Field::Types::CustomType: return Field(); } + UNREACHABLE(); } void readBinary(Array & x, ReadBuffer & buf) diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 1c9f28c9724..f93a885ee65 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -232,7 +232,6 @@ struct TimeWindowImpl default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } - UNREACHABLE(); } template @@ -422,7 +421,6 @@ struct TimeWindowImpl default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } - UNREACHABLE(); } template diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 30717550713..d669c8a4690 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -41,6 +41,8 @@ Token quotedString(const char *& pos, const char * const token_begin, const char ++pos; continue; } + + chassert(false); } } From f266bdb88e1891e484add0431e9e5ca56c963635 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 17 May 2024 14:44:17 +0000 Subject: [PATCH 351/651] Fix more places --- src/Functions/FunctionsRound.h | 4 ---- src/Interpreters/HashJoin.h | 6 ------ .../MergeTree/PartMovesBetweenShardsOrchestrator.cpp | 2 -- src/Storages/WindowView/StorageWindowView.cpp | 2 -- 4 files changed, 14 deletions(-) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 233d4058f11..dde57e8320d 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -243,8 +243,6 @@ inline float roundWithMode(float x, RoundingMode mode) case RoundingMode::Ceil: return ceilf(x); case RoundingMode::Trunc: return truncf(x); } - - UNREACHABLE(); } inline double roundWithMode(double x, RoundingMode mode) @@ -256,8 +254,6 @@ inline double roundWithMode(double x, RoundingMode mode) case RoundingMode::Ceil: return ceil(x); case RoundingMode::Trunc: return trunc(x); } - - UNREACHABLE(); } template diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 86db8943926..a0996556f9a 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -322,8 +322,6 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } - - UNREACHABLE(); } size_t getTotalByteCountImpl(Type which) const @@ -338,8 +336,6 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } - - UNREACHABLE(); } size_t getBufferSizeInCells(Type which) const @@ -354,8 +350,6 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } - - UNREACHABLE(); } /// NOLINTEND(bugprone-macro-parentheses) }; diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 78fcfabb704..4228d7b70b6 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -616,8 +616,6 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st } } } - - UNREACHABLE(); } void PartMovesBetweenShardsOrchestrator::removePins(const Entry & entry, zkutil::ZooKeeperPtr zk) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 4e11787cecf..8bca1c97aad 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -919,7 +919,6 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - UNREACHABLE(); } UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) @@ -947,7 +946,6 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } - UNREACHABLE(); } void StorageWindowView::addFireSignal(std::set & signals) From d964b4b78667a1437dd74836432828c5dda1be7e Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 17 May 2024 16:50:38 +0200 Subject: [PATCH 352/651] Finish archives related changes --- src/Disks/ObjectStorages/IObjectStorage.h | 6 +++ .../ObjectStorages/S3/S3ObjectStorage.cpp | 11 ++++- .../ObjectStorage/ReadBufferIterator.cpp | 40 ++++++++++++------- .../ObjectStorage/StorageObjectStorage.cpp | 7 +++- .../StorageObjectStorageCluster.cpp | 2 +- .../StorageObjectStorageSource.cpp | 37 +++++++++-------- .../StorageObjectStorageSource.h | 19 ++++++++- src/Storages/S3Queue/S3QueueSource.h | 2 +- 8 files changed, 88 insertions(+), 36 deletions(-) diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 43c7cf19adf..5724ae8929c 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -37,6 +37,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } class ReadBufferFromFileBase; @@ -64,6 +65,11 @@ struct RelativePathWithMetadata {} virtual ~RelativePathWithMetadata() = default; + + virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); } + virtual std::string getPath() const { return relative_path; } + virtual bool isArchive() const { return false; } + virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index c24874d0a94..983bb1834b8 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -457,7 +457,16 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true); + S3::ObjectInfo object_info; + try + { + object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true); + } + catch (DB::Exception & e) + { + e.addMessage("while reading " + path); + throw; + } ObjectMetadata result; result.size_bytes = object_info.size; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 61575b0115a..e065de16e55 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -50,7 +50,7 @@ SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const std::back_inserter(sources), [&](const auto & elem) { - return std::filesystem::path(configuration->getDataSourceDescription()) / elem->relative_path; + return std::filesystem::path(configuration->getDataSourceDescription()) / elem->getPath(); }); return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); } @@ -67,8 +67,9 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( const auto & object_info = (*it); auto get_last_mod_time = [&] -> std::optional { + const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath(); if (!object_info->metadata) - object_info->metadata = object_storage->tryGetObjectMetadata(object_info->relative_path); + object_info->metadata = object_storage->tryGetObjectMetadata(path); return object_info->metadata ? std::optional(object_info->metadata->last_modified.epochTime()) @@ -77,7 +78,7 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( if (format) { - auto cache_key = getKeyForSchemaCache(object_info->relative_path, *format); + auto cache_key = getKeyForSchemaCache(object_info->getPath(), *format); if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) return columns; } @@ -88,7 +89,7 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( /// If we have such entry for some format, we can use this format to read the file. for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) { - auto cache_key = getKeyForSchemaCache(object_info->relative_path, format_name); + auto cache_key = getKeyForSchemaCache(object_info->getPath(), format_name); if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) { /// Now format is known. It should be the same for all files. @@ -105,7 +106,7 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) { if (query_settings.schema_inference_use_cache) - schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path, *format), num_rows); + schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->getPath(), *format), num_rows); } void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) @@ -113,7 +114,7 @@ void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) if (query_settings.schema_inference_use_cache && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) { - schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path, *format), columns); + schema_cache.addColumns(getKeyForSchemaCache(current_object_info->getPath(), *format), columns); } } @@ -134,7 +135,7 @@ void ReadBufferIterator::setFormatName(const String & format_name) String ReadBufferIterator::getLastFileName() const { if (current_object_info) - return current_object_info->relative_path; + return current_object_info->getFileName(); else return ""; } @@ -142,9 +143,13 @@ String ReadBufferIterator::getLastFileName() const std::unique_ptr ReadBufferIterator::recreateLastReadBuffer() { auto context = getContext(); - auto impl = object_storage->readObject(StoredObject(current_object_info->relative_path), context->getReadSettings()); - const auto compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method); + + const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath(); + auto impl = object_storage->readObject(StoredObject(), context->getReadSettings()); + + const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); const auto zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); + return wrapReadBufferWithCompressionMethod(std::move(impl), compression_method, zstd_window_log_max); } @@ -158,7 +163,7 @@ ReadBufferIterator::Data ReadBufferIterator::next() { for (const auto & object_info : read_keys) { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->relative_path)) + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName())) { format = format_from_file_name; break; @@ -170,7 +175,9 @@ ReadBufferIterator::Data ReadBufferIterator::next() if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT) { if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end())) + { return {nullptr, cached_columns, format}; + } } } @@ -178,7 +185,7 @@ ReadBufferIterator::Data ReadBufferIterator::next() { current_object_info = file_iterator->next(0); - if (!current_object_info || current_object_info->relative_path.empty()) + if (!current_object_info) { if (first) { @@ -203,6 +210,9 @@ ReadBufferIterator::Data ReadBufferIterator::next() return {nullptr, std::nullopt, format}; } + const auto filename = current_object_info->getFileName(); + chassert(!filename.empty()); + /// file iterator could get new keys after new iteration if (read_keys.size() > prev_read_keys_size) { @@ -211,7 +221,7 @@ ReadBufferIterator::Data ReadBufferIterator::next() { for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->relative_path)) + if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) { format = format_from_file_name; break; @@ -250,15 +260,15 @@ ReadBufferIterator::Data ReadBufferIterator::next() using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; if (auto object_info_in_archive = dynamic_cast(current_object_info.get())) { - compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); + compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); auto & archive_reader = object_info_in_archive->archive_reader; read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); } else { - compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method); + compression_method = chooseCompressionMethod(filename, configuration->compression_method); read_buf = object_storage->readObject( - StoredObject(current_object_info->relative_path), + StoredObject(current_object_info->getPath()), getContext()->getReadSettings(), {}, current_object_info->metadata->size_bytes); diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 73e3d861cff..c45752c10f5 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -403,7 +403,12 @@ void StorageObjectStorage::Configuration::initialize( configuration.fromAST(engine_args, local_context, with_table_structure); if (configuration.format == "auto") - configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto"); + { + configuration.format = FormatFactory::instance().tryGetFormatFromFileName( + configuration.isArchive() + ? configuration.getPathInArchive() + : configuration.getPath()).value_or("auto"); + } else FormatFactory::instance().checkFormatName(configuration.format); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index a43d9da0fa3..78f568d8ae2 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -90,7 +90,7 @@ RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExten { auto object_info = iterator->next(0); if (object_info) - return object_info->relative_path; + return object_info->getPath(); else return ""; }); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 56905e6c29b..d3b67876224 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -100,6 +100,7 @@ std::shared_ptr StorageObjectStorageSourc "Expression can not have wildcards inside {} name", configuration->getNamespaceType()); auto settings = configuration->getQuerySettings(local_context); + const bool is_archive = configuration->isArchive(); std::unique_ptr iterator; if (configuration->isPathWithGlobs()) @@ -107,7 +108,7 @@ std::shared_ptr StorageObjectStorageSourc /// Iterate through disclosed globs and make a source for each file iterator = std::make_unique( object_storage, configuration, predicate, virtual_columns, - local_context, read_keys, settings.list_object_keys_size, + local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size, settings.throw_on_zero_files_match, file_progress_callback); } else @@ -126,11 +127,11 @@ std::shared_ptr StorageObjectStorageSourc } iterator = std::make_unique( - object_storage, copy_configuration, virtual_columns, read_keys, + object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys, settings.ignore_non_existent_file, file_progress_callback); } - if (configuration->isArchive()) + if (is_archive) { return std::make_shared(object_storage, configuration, std::move(iterator), local_context, read_keys); } @@ -175,12 +176,13 @@ Chunk StorageObjectStorageSource::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & object_info = reader.getObjectInfo(); + const auto & filename = object_info.getFileName(); chassert(object_info.metadata); VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, fs::path(configuration->getNamespace()) / reader.getRelativePath(), - object_info.metadata->size_bytes); + object_info.metadata->size_bytes, &filename); return chunk; } @@ -219,7 +221,7 @@ void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t n std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info) { const auto cache_key = getKeyForSchemaCache( - fs::path(configuration->getDataSourceDescription()) / object_info->relative_path, + fs::path(configuration->getDataSourceDescription()) / object_info->getPath(), configuration->format, format_settings, getContext()); @@ -242,11 +244,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade { object_info = file_iterator->next(processor); - if (!object_info || object_info->relative_path.empty()) + if (!object_info || object_info->getFileName().empty()) return {}; if (!object_info->metadata) - object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path); + { + const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath(); + object_info->metadata = object_storage->getObjectMetadata(path); + } } while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0); @@ -282,7 +287,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade } else { - compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method); + compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); read_buf = createReadBuffer(*object_info); } @@ -355,7 +360,7 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const O LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size); auto async_reader = object_storage->readObjects( - StoredObjects{StoredObject{object_info.relative_path, /* local_path */ "", object_size}}, read_settings); + StoredObjects{StoredObject{object_info.getPath(), /* local_path */ "", object_size}}, read_settings); async_reader->setReadUntilEnd(); if (read_settings.remote_fs_prefetch) @@ -366,7 +371,7 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer(const O else { /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting. - return object_storage->readObject(StoredObject(object_info.relative_path, "", object_size), read_settings); + return object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), read_settings); } } @@ -381,7 +386,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::IIterator::next( if (object_info) { - LOG_TEST(logger, "Next key: {}", object_info->relative_path); + LOG_TEST(logger, "Next key: {}", object_info->getFileName()); } return object_info; @@ -470,7 +475,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne new_batch = std::move(result.value()); for (auto it = new_batch.begin(); it != new_batch.end();) { - if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher)) + if (!recursive && !re2::RE2::FullMatch((*it)->getPath(), *matcher)) it = new_batch.erase(it); else ++it; @@ -487,7 +492,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne for (const auto & object_info : new_batch) { chassert(object_info); - paths.push_back(fs::path(configuration->getNamespace()) / object_info->relative_path); + paths.push_back(fs::path(configuration->getNamespace()) / object_info->getPath()); } VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); @@ -675,10 +680,10 @@ StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr o { const auto size = object_info->metadata->size_bytes; return DB::createArchiveReader( - /* path_to_archive */object_info->relative_path, + /* path_to_archive */object_info->getPath(), /* archive_read_function */[=, this]() { - StoredObject stored_object(object_info->relative_path, "", size); + StoredObject stored_object(object_info->getPath(), "", size); return object_storage->readObject(stored_object, getContext()->getReadSettings()); }, /* archive_size */size); @@ -720,7 +725,7 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) return {}; if (!archive_object->metadata) - archive_object->metadata = object_storage->getObjectMetadata(archive_object->relative_path); + archive_object->metadata = object_storage->getObjectMetadata(archive_object->getPath()); archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 664aad56928..fb0ad3e32f1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -92,7 +92,7 @@ protected: PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getRelativePath() const { return object_info->relative_path; } + std::string getRelativePath() const { return object_info->getPath(); } const ObjectInfo & getObjectInfo() const { return *object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } @@ -251,6 +251,23 @@ public: const std::string & path_in_archive_, std::shared_ptr archive_reader_); + std::string getFileName() const override + { + return path_in_archive; + } + + std::string getPath() const override + { + return archive_object->getPath() + "::" + path_in_archive; + } + + std::string getPathToArchive() const override + { + return archive_object->getPath(); + } + + bool isArchive() const override { return true; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index fdeed8d46d2..663577e055b 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -29,7 +29,7 @@ public: using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr; using ReaderHolder = StorageObjectStorageSource::ReaderHolder; using Metadata = S3QueueFilesMetadata; - using ObjectInfo = RelativePathWithMetadata; + using ObjectInfo = StorageObjectStorageSource::ObjectInfo; using ObjectInfoPtr = std::shared_ptr; using ObjectInfos = std::vector; From a2c040111c67b45c685a091e0dd617dc875593c6 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 17 May 2024 17:45:03 +0200 Subject: [PATCH 353/651] Improve ReplacingMergeTree is_deleted documentation --- .../mergetree-family/replacingmergetree.md | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index a6258bcd581..8ec34dea58e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -45,7 +45,7 @@ When merging, `ReplacingMergeTree` from all the rows with the same sorting key l - The last in the selection, if `ver` not set. A selection is a set of rows in a set of parts participating in the merge. The most recently created part (the last insert) will be the last one in the selection. Thus, after deduplication, the very last row from the most recent insert will remain for each unique sorting key. - With the maximum version, if `ver` specified. If `ver` is the same for several rows, then it will use "if `ver` is not specified" rule for them, i.e. the most recent inserted row will remain. -Example: +Example: ```sql -- without ver - the last inserted 'wins' @@ -90,14 +90,14 @@ SELECT * FROM mySecondReplacingMT FINAL; ### is_deleted -`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. +`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a "deleted" row, `0` is a "state" row. Column data type — `UInt8`. :::note `is_deleted` can only be enabled when `ver` is used. -The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used. +The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keywork is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. @@ -114,25 +114,31 @@ CREATE OR REPLACE TABLE myThirdReplacingMT `is_deleted` UInt8 ) ENGINE = ReplacingMergeTree(eventTime, is_deleted) -ORDER BY key; +ORDER BY key +SETTINGS allow_experimental_replacing_merge_with_cleanup = 1; INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); select * from myThirdReplacingMT final; 0 rows in set. Elapsed: 0.003 sec. --- delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; +-- A simple optimize + final does not delete rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL; -INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); - -select * from myThirdReplacingMT final; +select * from myThirdReplacingMT; ┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ -│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +│ 1 │ first │ 2020-01-01 01:01:01 │ 1 │ └─────┴─────────┴─────────────────────┴────────────┘ + +-- A cleanup optimize deletes rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; + +select * from myThirdReplacingMT; + +0 rows in set. Elapsed: 0.002 sec. ``` ## Query clauses From acba6fd7a20ad44b29d373b5b44b26675444eaa3 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 17 May 2024 17:57:24 +0200 Subject: [PATCH 354/651] Fix typo --- .../table-engines/mergetree-family/replacingmergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 8ec34dea58e..58fa2829a64 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -97,7 +97,7 @@ SELECT * FROM mySecondReplacingMT FINAL; :::note `is_deleted` can only be enabled when `ver` is used. -The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keywork is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. +The row is deleted only when `OPTIMIZE ... FINAL CLEANUP`. This `CLEANUP` special keyword is not allowed by default unless `allow_experimental_replacing_merge_with_cleanup` MergeTree setting is enabled. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. From 16889ff0324bc607b8b376ee1abba7bb990b9b91 Mon Sep 17 00:00:00 2001 From: Jordi Villar Date: Fri, 17 May 2024 18:03:51 +0200 Subject: [PATCH 355/651] Rollback doc example --- .../mergetree-family/replacingmergetree.md | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 58fa2829a64..5a0a2691a9e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -124,21 +124,16 @@ select * from myThirdReplacingMT final; 0 rows in set. Elapsed: 0.003 sec. --- A simple optimize + final does not delete rows with is_deleted -OPTIMIZE TABLE myThirdReplacingMT FINAL; - -select * from myThirdReplacingMT; - -┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ -│ 1 │ first │ 2020-01-01 01:01:01 │ 1 │ -└─────┴─────────┴─────────────────────┴────────────┘ - --- A cleanup optimize deletes rows with is_deleted +-- delete rows with is_deleted OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; -select * from myThirdReplacingMT; +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); -0 rows in set. Elapsed: 0.002 sec. +select * from myThirdReplacingMT final; + +┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ +│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +└─────┴─────────┴─────────────────────┴────────────┘ ``` ## Query clauses From 4909c3ea2393c66226c23cd03847f1c5e5b05ff7 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 17 May 2024 18:24:21 +0200 Subject: [PATCH 356/651] Cleanups --- src/Storages/MergeTree/IMergeTreeDataPart.h | 11 ------ .../MergeTree/IMergeTreeDataPartWriter.cpp | 7 ---- .../MergeTree/IMergeTreeDataPartWriter.h | 39 ++++++------------- .../MergeTree/IMergedBlockOutputStream.cpp | 8 +--- .../MergeTree/IMergedBlockOutputStream.h | 10 ++--- src/Storages/MergeTree/MergeTask.cpp | 2 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 29 +++++++------- .../MergeTree/MergeTreeDataPartCompact.h | 9 ----- .../MergeTree/MergeTreeDataPartWide.cpp | 15 ++++--- .../MergeTree/MergeTreeDataPartWide.h | 9 ----- .../MergeTreeDataPartWriterCompact.cpp | 18 ++++----- .../MergeTreeDataPartWriterCompact.h | 6 +-- .../MergeTreeDataPartWriterOnDisk.cpp | 4 +- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 13 ++----- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 29 ++++++-------- .../MergeTree/MergeTreeDataPartWriterWide.h | 6 +-- src/Storages/MergeTree/MergeTreeIOSettings.h | 2 +- src/Storages/MergeTree/MergeTreePartition.cpp | 5 +-- src/Storages/MergeTree/MergeTreePartition.h | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 1 + .../MergedColumnOnlyOutputStream.cpp | 9 ++--- src/Storages/MergeTree/MutateTask.cpp | 1 + 22 files changed, 76 insertions(+), 159 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 4ec5b3f5f8a..091a7ceb5bd 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -43,7 +43,6 @@ class IReservation; using ReservationPtr = std::unique_ptr; class IMergeTreeReader; -class IMergeTreeDataPartWriter; class MarkCache; class UncompressedCache; class MergeTreeTransaction; @@ -74,7 +73,6 @@ public: using VirtualFields = std::unordered_map; using MergeTreeReaderPtr = std::unique_ptr; -// using MergeTreeWriterPtr = std::unique_ptr; using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; @@ -106,15 +104,6 @@ public: const ValueSizeMap & avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0; -//// virtual MergeTreeWriterPtr getWriter( -//// const NamesAndTypesList & columns_list, -//// const StorageMetadataPtr & metadata_snapshot, -//// const std::vector & indices_to_recalc, -//// const Statistics & stats_to_recalc_, -//// const CompressionCodecPtr & default_codec_, -//// const MergeTreeWriterSettings & writer_settings, -//// const MergeTreeIndexGranularity & computed_index_granularity) = 0; - // TODO: remove? virtual bool isStoredOnDisk() const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index b46fbc5fc9e..e01572715d6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,5 +1,4 @@ #include -#include "Storages/MergeTree/MergeTreeSettings.h" namespace DB { @@ -46,12 +45,10 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per } IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( -// const MergeTreeMutableDataPartPtr & data_part_, const String & data_part_name_, const SerializationByName & serializations_, MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, - const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, @@ -61,7 +58,6 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( , serializations(serializations_) , data_part_storage(data_part_storage_) , index_granularity_info(index_granularity_info_) - , storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) , columns_list(columns_list_) @@ -117,7 +113,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, @@ -134,7 +129,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, @@ -153,7 +147,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 6854668a01e..3245a23339b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -1,14 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include "Storages/MergeTree/MergeTreeDataPartType.h" -#include "Storages/MergeTree/MergeTreeSettings.h" +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -24,15 +22,11 @@ class IMergeTreeDataPartWriter : private boost::noncopyable { public: IMergeTreeDataPartWriter( -// const MergeTreeMutableDataPartPtr & data_part_, - const String & data_part_name_, const SerializationByName & serializations_, MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, - const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeWriterSettings & settings_, @@ -42,7 +36,7 @@ public: virtual void write(const Block & block, const IColumn::Permutation * permutation) = 0; - virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) = 0; + virtual void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) = 0; virtual void finish(bool sync) = 0; @@ -56,21 +50,12 @@ protected: IDataPartStorage & getDataPartStorage() { return *data_part_storage; } - -// const MergeTreeMutableDataPartPtr data_part; // TODO: remove - /// Serializations for every columns and subcolumns by their names. - String data_part_name; - SerializationByName serializations; + const String data_part_name; + const SerializationByName serializations; MutableDataPartStoragePtr data_part_storage; - MergeTreeIndexGranularityInfo index_granularity_info; - - -// const MergeTreeData & storage; // TODO: remove - + const MergeTreeIndexGranularityInfo index_granularity_info; const MergeTreeSettingsPtr storage_settings; - - const StorageMetadataPtr metadata_snapshot; const NamesAndTypesList columns_list; const MergeTreeWriterSettings settings; @@ -90,7 +75,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, @@ -100,5 +84,4 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity); - } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index f99adf7c4db..89c813ab233 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -2,30 +2,26 @@ #include #include #include -#include "Storages/MergeTree/IDataPartStorage.h" -#include "Storages/StorageSet.h" namespace DB { IMergedBlockOutputStream::IMergedBlockOutputStream( -// const MergeTreeMutableDataPartPtr & data_part, const MergeTreeSettingsPtr & storage_settings_, MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list, bool reset_columns_) - //: storage(data_part->storage) : storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) - , data_part_storage(data_part_storage_)//data_part->getDataPartStoragePtr()) + , data_part_storage(data_part_storage_) , reset_columns(reset_columns_) { if (reset_columns) { SerializationInfo::Settings info_settings = { - .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization,//storage.getSettings()->ratio_of_defaults_for_sparse_serialization, + .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization, .choose_kind = false, }; diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index b6f279e6d58..a9b058418ea 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,12 +1,12 @@ #pragma once -#include "Storages/MergeTree/IDataPartStorage.h" -#include "Storages/MergeTree/MergeTreeSettings.h" +#include +#include #include #include #include #include -#include "Common/Logger.h" +#include namespace DB { @@ -15,7 +15,6 @@ class IMergedBlockOutputStream { public: IMergedBlockOutputStream( -// const MergeTreeMutableDataPartPtr & data_part, const MergeTreeSettingsPtr & storage_settings_, MutableDataPartStoragePtr data_part_storage_, const StorageMetadataPtr & metadata_snapshot_, @@ -43,11 +42,8 @@ protected: SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums); -// const MergeTreeData & storage; // TODO: remove -//// MergeTreeSettingsPtr storage_settings; LoggerPtr log; -//// StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 1b5ad0d81a7..2ce74bde1d5 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index eebbe3110c0..373ad6c23ea 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -48,21 +48,20 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( } MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( - const String & data_part_name_, - const String & logger_name_, - const SerializationByName & serializations_, - MutableDataPartStoragePtr data_part_storage_, - const MergeTreeIndexGranularityInfo & index_granularity_info_, - const MergeTreeSettingsPtr & storage_settings_, - - const NamesAndTypesList & columns_list, - const StorageMetadataPtr & metadata_snapshot, - const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, - const String & marks_file_extension_, - const CompressionCodecPtr & default_codec_, - const MergeTreeWriterSettings & writer_settings, - const MergeTreeIndexGranularity & computed_index_granularity) + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, + const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, + const std::vector & indices_to_recalc, + const Statistics & stats_to_recalc_, + const String & marks_file_extension_, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity) { ////// TODO: fix the order of columns //// diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 5a57d778b7d..ca88edba7b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -40,15 +40,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; -// MergeTreeWriterPtr getWriter( -// const NamesAndTypesList & columns_list, -// const StorageMetadataPtr & metadata_snapshot, -// const std::vector & indices_to_recalc, -// const Statistics & stats_to_recalc_, -// const CompressionCodecPtr & default_codec_, -// const MergeTreeWriterSettings & writer_settings, -// const MergeTreeIndexGranularity & computed_index_granularity) override; - // TODO: remove? bool isStoredOnDisk() const override { return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index c99cff258e0..34a3f30c4ba 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -54,18 +54,17 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( } MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( - const String & data_part_name_, - const String & logger_name_, - const SerializationByName & serializations_, - MutableDataPartStoragePtr data_part_storage_, - const MergeTreeIndexGranularityInfo & index_granularity_info_, - const MergeTreeSettingsPtr & storage_settings_, - + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, - const String & marks_file_extension_, + const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 45d0fbbebec..e3cb3f04335 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -35,15 +35,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; -// MergeTreeWriterPtr getWriter( -// const NamesAndTypesList & columns_list, -// const StorageMetadataPtr & metadata_snapshot, -// const std::vector & indices_to_recalc, -// const Statistics & stats_to_recalc_, -// const CompressionCodecPtr & default_codec_, -// const MergeTreeWriterSettings & writer_settings, -// const MergeTreeIndexGranularity & computed_index_granularity) override; - // TODO: remove? bool isStoredOnDisk() const override { return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 6e8ea1a915b..3f08d8eea21 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -10,14 +10,12 @@ namespace ErrorCodes } MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( -// const MergeTreeMutableDataPartPtr & data_part_, - const String & data_part_name_, - const String & logger_name_, - const SerializationByName & serializations_, - MutableDataPartStoragePtr data_part_storage_, - const MergeTreeIndexGranularityInfo & index_granularity_info_, - const MergeTreeSettingsPtr & storage_settings_, - + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, @@ -250,7 +248,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G } } -void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums) +void MergeTreeDataPartWriterCompact::fillDataChecksums(MergeTreeDataPartChecksums & checksums) { if (columns_buffer.size() != 0) { @@ -420,7 +418,7 @@ size_t MergeTreeDataPartWriterCompact::ColumnsBuffer::size() const return accumulated_columns.at(0)->size(); } -void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & /*checksums_to_remove*/) +void MergeTreeDataPartWriterCompact::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & /*checksums_to_remove*/) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 3bec4c7e988..03804ff4966 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -11,14 +11,12 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterCompact( -// const MergeTreeMutableDataPartPtr & data_part, const String & data_part_name_, const String & logger_name_, const SerializationByName & serializations_, MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, @@ -30,12 +28,12 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) override; + void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) override; void finish(bool sync) override; private: /// Finish serialization of the data. Flush rows in buffer to disk, compute checksums. - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums); + void fillDataChecksums(MergeTreeDataPartChecksums & checksums); void finishDataSerialization(bool sync); void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 13892c17577..25eb83a82c0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -146,7 +146,6 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const MergeTreeIndices & indices_to_recalc_, @@ -231,7 +230,6 @@ static size_t computeIndexGranularityImpl( size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const { -// const auto storage_settings = storage.getSettings(); return computeIndexGranularityImpl( block, storage_settings->index_granularity_bytes, @@ -293,7 +291,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() GinIndexStorePtr store = nullptr; if (typeid_cast(&*skip_index) != nullptr) { - store = std::make_shared(stream_name, data_part_storage, data_part_storage, /*storage.getSettings()*/storage_settings->max_digestion_size_per_segment); + store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 39f33217b57..e17724fa1d0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -5,9 +5,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -97,21 +94,19 @@ public: void sync() const; - void addToChecksums(IMergeTreeDataPart::Checksums & checksums); + void addToChecksums(MergeTreeDataPartChecksums & checksums); }; using StreamPtr = std::unique_ptr>; using StatisticStreamPtr = std::unique_ptr>; MergeTreeDataPartWriterOnDisk( -// const MergeTreeMutableDataPartPtr & data_part_, const String & data_part_name_, const String & logger_name_, const SerializationByName & serializations_, MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, @@ -140,13 +135,13 @@ protected: void calculateAndSerializeStatistics(const Block & stats_block); /// Finishes primary index serialization: write final primary index row (if required) and compute checksums - void fillPrimaryIndexChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillPrimaryIndexChecksums(MergeTreeDataPartChecksums & checksums); void finishPrimaryIndexSerialization(bool sync); /// Finishes skip indices serialization: write all accumulated data to disk and compute checksums - void fillSkipIndicesChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillSkipIndicesChecksums(MergeTreeDataPartChecksums & checksums); void finishSkipIndicesSerialization(bool sync); - void fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums); + void fillStatisticsChecksums(MergeTreeDataPartChecksums & checksums); void finishStatisticsSerialization(bool sync); /// Get global number of the current which we are writing (or going to start to write) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 713dee87fa8..a57bf7d2037 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -76,14 +76,12 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, } MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( -// const MergeTreeMutableDataPartPtr & data_part_, - const String & data_part_name_, - const String & logger_name_, - const SerializationByName & serializations_, - MutableDataPartStoragePtr data_part_storage_, - const MergeTreeIndexGranularityInfo & index_granularity_info_, - const MergeTreeSettingsPtr & storage_settings_, - + const String & data_part_name_, + const String & logger_name_, + const SerializationByName & serializations_, + MutableDataPartStoragePtr data_part_storage_, + const MergeTreeIndexGranularityInfo & index_granularity_info_, + const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, @@ -114,7 +112,6 @@ void MergeTreeDataPartWriterWide::addStreams( { assert(!substream_path.empty()); -// auto storage_settings = storage.getSettings(); auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path); String stream_name; @@ -416,11 +413,10 @@ void MergeTreeDataPartWriterWide::writeColumn( serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } -// const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; for (const auto & granule : granules) { @@ -603,12 +599,11 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai } -void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) +void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { -// const auto & global_settings = storage.getContext()->getSettingsRef(); ISerialization::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { @@ -683,7 +678,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync) } -void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) +void MergeTreeDataPartWriterWide::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) { // If we don't have anything to write, skip finalization. if (!columns_list.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index ef9c4ab17dc..5789213c910 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -21,14 +21,12 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterWide( -// const MergeTreeMutableDataPartPtr & data_part, const String & data_part_name_, const String & logger_name_, const SerializationByName & serializations_, MutableDataPartStoragePtr data_part_storage_, const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, - const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, @@ -40,14 +38,14 @@ public: void write(const Block & block, const IColumn::Permutation * permutation) override; - void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) final; + void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) final; void finish(bool sync) final; private: /// Finish serialization of data: write final mark if required and compute checksums /// Also validate written data in debug mode - void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove); + void fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove); void finishDataSerialization(bool sync); /// Write data of one column. diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 421c62887da..2b7d5c366f2 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -75,7 +75,7 @@ struct MergeTreeWriterSettings , query_write_settings(query_write_settings_) , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) - , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part) + , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) { } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index c2ef7f98388..c7b7557fe52 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "Interpreters/Context_fwd.h" #include #include @@ -414,12 +413,10 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {}); } -std::unique_ptr MergeTreePartition::store(/*const MergeTreeData & storage,*/ +std::unique_ptr MergeTreePartition::store( StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const { -// auto metadata_snapshot = storage.getInMemoryMetadataPtr(); -// const auto & context = storage.getContext(); const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage_context).sample_block; return store(partition_key_sample, data_part_storage, checksums, storage_context->getWriteSettings()); } diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 04175d6f927..44def70bdd9 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -44,7 +44,7 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. - [[nodiscard]] std::unique_ptr store(//const MergeTreeData & storage, + [[nodiscard]] std::unique_ptr store( StorageMetadataPtr metadata_snapshot, ContextPtr storage_context, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const; [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 2441d941952..e0fb4f703a0 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 51853384012..1c75d81eca5 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -24,7 +24,6 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( , header(header_) { const auto & global_settings = data_part->storage.getContext()->getSettings(); -// const auto & storage_settings = data_part->storage.getSettings(); MergeTreeWriterSettings writer_settings( global_settings, @@ -34,10 +33,10 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( /* rewrite_primary_key = */ false); writer = createMergeTreeDataPartWriter( - data_part->getType(), - data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), - data_part_storage, data_part->index_granularity_info, - storage_settings, + data_part->getType(), + data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), + data_part_storage, data_part->index_granularity_info, + storage_settings, header.getNamesAndTypesList(), metadata_snapshot_, indices_to_recalc, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 54077055d96..7d6b68c7359 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include From d6a16b81ed2d1ab4099f2d786c22d127df223a02 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 17 May 2024 19:30:18 +0200 Subject: [PATCH 357/651] Files without shebang have mime 'text/plain' or 'inode/x-empty' --- tests/ci/style_check.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index dc5d974daf0..9906d87a8c0 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -11,6 +11,7 @@ from pathlib import Path from typing import List, Tuple, Union import magic + from docker_images_helper import get_docker_image, pull_image from env_helper import CI, REPO_COPY, TEMP_PATH from git_helper import GIT_PREFIX, git_runner @@ -121,12 +122,12 @@ def _check_mime(file: Union[Path, str], mime: str) -> bool: def is_python(file: Union[Path, str]) -> bool: """returns if the changed file in the repository is python script""" - return _check_mime(file, "text/x-script.python") + return _check_mime(file, "text/x-script.python") or str(file).endswith(".py") def is_shell(file: Union[Path, str]) -> bool: """returns if the changed file in the repository is shell script""" - return _check_mime(file, "text/x-shellscript") + return _check_mime(file, "text/x-shellscript") or str(file).endswith(".sh") def main(): From 13c94806e5f5ff800620d502229ff17cbce379f2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 17 May 2024 19:44:57 +0200 Subject: [PATCH 358/651] fix waiting for mutations with retriable errors --- src/Storages/StorageReplicatedMergeTree.cpp | 31 ++++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cc6599f8cd1..d60fa6bc787 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -590,6 +590,9 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( LOG_DEBUG(log, "Waiting for {} to apply mutation {}", replica, mutation_id); zkutil::EventPtr wait_event = std::make_shared(); + constexpr size_t MAX_RETRIES_ON_FAILED_MUTATION = 30; + size_t retries_on_failed_mutation = 0; + while (!partial_shutdown_called) { /// Mutation maybe killed or whole replica was deleted. @@ -637,18 +640,32 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( } } - /// If mutation status is empty, than local replica may just not loaded it into memory. - if (mutation_status && !mutation_status->latest_fail_reason.empty()) - { - LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason); - break; - } - /// Replica can become inactive, so wait with timeout, if nothing happened -> recheck it if (!wait_event->tryWait(1000)) { LOG_TRACE(log, "Failed to wait for mutation '{}', will recheck", mutation_id); } + + /// If mutation status is empty, than local replica may just not loaded it into memory. + if (mutation_status && !mutation_status->latest_fail_reason.empty()) + { + LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason); + + /// In some cases latest_fail_reason may be retryable and there's a chance it will be cleared after the next attempt + if (++retries_on_failed_mutation <= MAX_RETRIES_ON_FAILED_MUTATION) + continue; + + if (mutation_status->is_done) + { + LOG_DEBUG(log, "Looks like mutation {} is done, rechecking", mutation_id); + continue; + } + + /// It's still possible that latest_fail_reason will be cleared just before queue.getIncompleteMutationsStatus(...) below, + /// but it's unlikely. Anyway, rethrow the exception here to avoid exiting with is_done=false + checkMutationStatus(mutation_status, {mutation_id}); + throw Exception(ErrorCodes::LOGICAL_ERROR, "checkMutationStatus didn't throw when checking status of {}: {}", mutation_id, mutation_status->latest_fail_reason); + } } /// This replica inactive, don't check anything From f8693017fb45a2aa63e7fd3878f55aac4d379b56 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 21:43:24 +0300 Subject: [PATCH 359/651] Revert "Update gui.md - Add ch-ui to open-source available tools." --- docs/en/interfaces/third-party/gui.md | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 0d888812cd5..bb326dc5242 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -222,17 +222,6 @@ Features: - Useful tools: Zookeeper data exploration, query EXPLAIN, kill queries, etc. - Visualization metric charts: queries and resource usage, number of merges/mutation, merge performance, query performance, etc. -### ch-ui {#ch-ui} - -[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases, designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions. - -Features: - -- ClickHouse Integration: Easily manage connections and execute queries. -- Responsive Tab Management: Dynamically handle multiple tabs, such as query and table tabs. -- Performance Optimizations: Utilize IndexedDB for efficient caching and state management. -- Local Data Storage: All data is stored locally in the browser, ensuring no data is sent anywhere else. - ## Commercial {#commercial} ### DataGrip {#datagrip} From 71d33baf0e9d8fcf25d65f069e839664a9c01895 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 May 2024 21:45:44 +0300 Subject: [PATCH 360/651] Update gui.md --- docs/en/interfaces/third-party/gui.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index bb326dc5242..5b7615485ca 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -8,6 +8,17 @@ sidebar_label: Visual Interfaces ## Open-Source {#open-source} +### ch-ui {#ch-ui} + +[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases, designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions. + +Features: + +- ClickHouse Integration: Easily manage connections and execute queries. +- Responsive Tab Management: Dynamically handle multiple tabs, such as query and table tabs. +- Performance Optimizations: Utilizes Indexed DB for efficient caching and state management. +- Local Data Storage: All data is stored locally in the browser, ensuring no data is sent anywhere else. + ### Tabix {#tabix} Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) project. From 077e6057f275a69a5fac48097b995572a5e07f06 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 17 May 2024 21:45:07 +0200 Subject: [PATCH 361/651] Update reinterpretAsDate and reinterpretAsDateTime functions, add a test --- .../functions/type-conversion-functions.md | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ea08ffa50e7..cf3483f27a4 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1004,9 +1004,91 @@ Result: ## reinterpretAsDate +Accepts a string, fixed string or numeric value and interprets the bytes as a number in host order (little endian). It returns a date from the interpreted number as the number of days since the beginning of the Unix Epoch. + +**Syntax** + +```sql +reinterpretAsDate(x) +``` + +**Parameters** + +- `x`: number of days since the beginning of the Unix Epoch. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Date. [Date](../data-types/date.md). + +**Implementation details** + +:::note +If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. +::: + +**Example** + +Query: + +```sql +SELECT reinterpretAsDate(65), reinterpretAsDate('A'); +``` + +Result: + +```response +┌─reinterpretAsDate(65)─┬─reinterpretAsDate('A')─┐ +│ 1970-03-07 │ 1970-03-07 │ +└───────────────────────┴────────────────────────┘ +``` + ## reinterpretAsDateTime -These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. +These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). Returns a date with time interpreted as the number of seconds since the beginning of the Unix Epoch. + +**Syntax** + +```sql +reinterpretAsDateTime(x) +``` + +**Parameters** + +- `x`: number of seconds since the beginning of the Unix Epoch. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Date and Time. [DateTime](../data-types/datetime.md). + +**Implementation details** + +:::note +If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. +::: + +**Example** + +Query: + +```sql +SELECT reinterpretAsDateTime(65), reinterpretAsDateTime('A'); +``` + +Result: + +```response +┌─reinterpretAsDateTime(65)─┬─reinterpretAsDateTime('A')─┐ +│ 1970-01-01 01:01:05 │ 1970-01-01 01:01:05 │ +└───────────────────────────┴────────────────────────────┘ +``` ## reinterpretAsString From 764bf4d477c95cc3d27fe438a439956829997f9c Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 17 May 2024 22:04:40 +0200 Subject: [PATCH 362/651] Update reinterpretAsFixedString documentation and add tests --- .../functions/type-conversion-functions.md | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index cf3483f27a4..14a12ab5d5d 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1098,6 +1098,38 @@ This function accepts a number or date or date with time and returns a string co This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. +**Syntax** + +```sql +reinterpretAsFixedString(x) +``` + +**Parameters** + +- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md). + +**Returned value** + +- Fixed string containing bytes representing `x`. [FixedString](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT + reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')), + reinterpretAsFixedString(toDate('1970-03-07')); +``` + +Result: + +```response +┌─reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsFixedString(toDate('1970-03-07'))─┐ +│ A │ A │ +└─────────────────────────────────────────────────────────────┴────────────────────────────────────────────────┘ +``` + ## reinterpretAsUUID :::note From 2c8b303a2fc69365be39a91179365466c3ebc14a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 17 May 2024 20:16:58 +0000 Subject: [PATCH 363/651] Use Dynamic as supertype, add more tests, fix tests flakiness, update docs --- docs/en/sql-reference/data-types/dynamic.md | 4 ++-- src/DataTypes/getLeastSupertype.cpp | 19 +++++++++++++++++++ .../03037_dynamic_merges_1_horizontal.sh | 2 +- .../03037_dynamic_merges_1_vertical.sh | 2 +- .../03159_dynamic_type_all_types.reference | 12 ++++++------ .../03159_dynamic_type_all_types.sql | 4 ++-- .../03163_dynamic_as_supertype.reference | 10 ++++++++++ .../03163_dynamic_as_supertype.sql | 8 ++++++++ 8 files changed, 49 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/03163_dynamic_as_supertype.reference create mode 100644 tests/queries/0_stateless/03163_dynamic_as_supertype.sql diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index eabf032c52f..955fd54e641 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax: Dynamic(max_types=N) ``` -Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic`. If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`. +Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`. :::note The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`. @@ -355,7 +355,7 @@ SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic; - Compare `Dynamic` subcolumn with required type: ```sql -SELECT * FROM test WHERE d2.`Array(Int64)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') +SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)') ``` ```text diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 0977bea362c..a71b19d6c92 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB @@ -256,6 +257,24 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return types[0]; } + /// If one of the types is Dynamic, the supertype is Dynamic + { + bool have_dynamic = false; + size_t max_dynamic_types = 0; + + for (const auto & type : types) + { + if (const auto & dynamic_type = typeid_cast(type.get())) + { + have_dynamic = true; + max_dynamic_types = std::max(max_dynamic_types, dynamic_type->getMaxDynamicTypes()); + } + } + + if (have_dynamic) + return std::make_shared(max_dynamic_types); + } + /// Recursive rules /// If there are Nothing types, skip them diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh index 0d3cd45666a..7c1ac41cfdc 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh @@ -8,7 +8,7 @@ CLICKHOUSE_LOG_COMMENT= . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 " +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" function test() { diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh index b2c40668228..927ceac72b5 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh @@ -8,8 +8,8 @@ CLICKHOUSE_LOG_COMMENT= . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 " +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" function test() { echo "test" diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference index a162ec4f857..7dcaaa1f3ec 100644 --- a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference @@ -110,9 +110,9 @@ Map(Dynamic, Dynamic) {'11':'v1','22':'1'} Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] Object(\'json\') {"1":"2"} -Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2} -Object(Nullable(\'json\')) {"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null} -Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} +Object(Nullable(\'json\')) {"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null} Point (1.23,4.5600000000000005) Ring [(1.23,4.5600000000000005),(2.34,5.67)] String string @@ -259,9 +259,9 @@ Map(Dynamic, Dynamic) {'11':'v1','22':'1'} Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] Object(\'json\') {"1":"2"} -Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2} -Object(Nullable(\'json\')) {"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null} -Object(Nullable(\'json\')) {"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null} +Object(Nullable(\'json\')) {"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} Point (1.23,4.5600000000000005) Ring [(1.23,4.5600000000000005),(2.34,5.67)] String string diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql index 38d70dee64e..64fab07ed4f 100644 --- a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql @@ -86,13 +86,13 @@ INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' yea INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String)); INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))); -SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d ; +SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d); CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory; INSERT INTO t2 SELECT * FROM t; SELECT ''; -SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; +SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d); SELECT ''; SELECT uniqExact(dynamicType(d)) t_ FROM t; diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference new file mode 100644 index 00000000000..5f1a8613a77 --- /dev/null +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference @@ -0,0 +1,10 @@ +str_0 Dynamic(max_types=3) String +1 Dynamic(max_types=3) UInt64 +str_2 Dynamic(max_types=3) String +3 Dynamic(max_types=3) UInt64 +str_1 String +42 UInt64 +str_2 String +43 UInt64 +2020-01-01 Date +[1,2,3] Array(Int64) diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql new file mode 100644 index 00000000000..fbb6aa74fab --- /dev/null +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql @@ -0,0 +1,8 @@ +SET allow_experimental_dynamic_type=1; +SELECT if(number % 2, number::Dynamic(max_types=3), ('str_' || toString(number))::Dynamic(max_types=2)) AS d, toTypeName(d), dynamicType(d) FROM numbers(4); +CREATE TABLE dynamic_test_1 (d Dynamic(max_types=3)) ENGINE = Memory; +INSERT INTO dynamic_test_1 VALUES ('str_1'), (42::UInt64); +CREATE TABLE dynamic_test_2 (d Dynamic(max_types=5)) ENGINE = Memory; +INSERT INTO dynamic_test_2 VALUES ('str_2'), (43::UInt64), ('2020-01-01'::Date), ([1, 2, 3]); +SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2; + From 24892b151a8f2012a5bfd9e3ae46ba379be133c7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 May 2024 22:52:25 +0200 Subject: [PATCH 364/651] Try to fix grpc for aarch64 crosscompilation (cherry picked from commit f3fbf532e0d4d7616f51a9c3d5087cf7b2e6d7d5) --- cmake/target.cmake | 5 +-- contrib/grpc-cmake/CMakeLists.txt | 56 +++++++++++++++++++++++++++++++ contrib/grpc-cmake/grpc.cmake | 4 +++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index fb911ace7b5..d4a40602b21 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -41,10 +41,7 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_ICU OFF CACHE INTERNAL "") set (ENABLE_FASTOPS OFF CACHE INTERNAL "") elseif (OS_LINUX OR OS_ANDROID) - if (ARCH_AARCH64) - # FIXME: broken dependencies - set (ENABLE_GRPC OFF CACHE INTERNAL "") - elseif (ARCH_PPC64LE) + if (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") elseif (ARCH_RISCV64) # RISC-V support is preliminary diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index b8b5f5580c4..e7cf42cc87f 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -31,3 +31,59 @@ add_library(_ch_contrib_grpc INTERFACE) target_link_libraries(_ch_contrib_grpc INTERFACE ${gRPC_LIBRARIES}) target_include_directories(_ch_contrib_grpc SYSTEM INTERFACE ${gRPC_INCLUDE_DIRS}) add_library(ch_contrib::grpc ALIAS _ch_contrib_grpc) + +# Here we are trying to build a binary tool grpc_cpp_plugin in case of cross-compilation. +# We need this file only during compilation process itself so we need it for our "host" +# platform, not "target" platform. +# If we are doing normal compilation this file will be produced in grpc.cmake. +# +# All code inside this block looks so weird because cmake fundametally doesn't +# support different toolchains for different targets. So we just running it +# in "bash script" fashion with different (actually without, i.e. default) toolchain. +# +# FIXME Sorry, I don't know cmake. +if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME + OR NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR) + + set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build") + + execute_process( + COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} + COMMAND_ECHO STDOUT + ) + + set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") + set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf") + set(re2_source_dir "${ClickHouse_SOURCE_DIR}/contrib/re2") + set(ssl_source_dir "${ClickHouse_SOURCE_DIR}/contrib/openssl") + set(zlib_source_dir "${ClickHouse_SOURCE_DIR}/contrib/zlib-ng") + # For some reason config exists only in this directory + set(zlib_config_source_dir "${ClickHouse_BINARY_DIR}/contrib/zlib-ng-cmake") + set(cares_source_dir "${ClickHouse_SOURCE_DIR}/contrib/c-ares") + execute_process( + COMMAND ${CMAKE_COMMAND} + "-G${CMAKE_GENERATOR}" + "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DABSL_ROOT_DIR=${abseil_source_dir}" + "-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${zlib_config_source_dir}" + "-DgRPC_INSTALL=0" + "-DABSL_ENABLE_INSTALL=1" + "-DPROTOBUF_ROOT_DIR=${protobuf_source_dir}" + "-DRE2_ROOT_DIR=${re2_source_dir}" + "-DCARES_ROOT_DIR=${cares_source_dir}" + "-DBORINGSSL_ROOT_DIR=${ssl_source_dir}" + "-DZLIB_ROOT_DIR=${zlib_source_dir}" + "${_gRPC_SOURCE_DIR}" + WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + execute_process( + COMMAND ${CMAKE_COMMAND} --build "${GRPC_CPP_PLUGIN_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + add_executable(grpc_cpp_plugin IMPORTED GLOBAL) + set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") + add_dependencies(grpc_cpp_plugin "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") +endif() diff --git a/contrib/grpc-cmake/grpc.cmake b/contrib/grpc-cmake/grpc.cmake index c2488539211..39645938f14 100644 --- a/contrib/grpc-cmake/grpc.cmake +++ b/contrib/grpc-cmake/grpc.cmake @@ -1829,6 +1829,8 @@ target_link_libraries(grpc_plugin_support ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} ) +if (CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME + AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR) add_executable(grpc_cpp_plugin ${_gRPC_SOURCE_DIR}/src/compiler/cpp_plugin.cc @@ -1852,3 +1854,5 @@ target_link_libraries(grpc_cpp_plugin ${_gRPC_ALLTARGETS_LIBRARIES} grpc_plugin_support ) + +endif() From dd6c763492d032738c922cff19c8687e05c2f542 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 17 May 2024 17:48:06 -0400 Subject: [PATCH 365/651] Use of the redefined context in process query pipline. --- .../Transforms/buildPushingToViewsChain.cpp | 5 +-- .../Transforms/buildPushingToViewsChain.h | 3 ++ ...te_view_with_sql_security_option.reference | 1 + ...84_create_view_with_sql_security_option.sh | 35 +++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5e8ecdca95e..cdcfad4442c 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -414,7 +414,8 @@ std::optional generateViewChain( out.getInputHeader(), view_id, nullptr, - std::move(runtime_stats)}); + std::move(runtime_stats), + insert_context}); if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { @@ -590,7 +591,7 @@ Chain buildPushingToViewsChain( static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { - const auto & context = views_data.context; + const auto & context = view.context; /// We create a table with the same name as original table and the same alias columns, /// but it will contain single block (that is INSERT-ed into main table). diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 53aceeda1cc..a1feed91b60 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -33,6 +33,9 @@ struct ViewRuntimeData /// Info which is needed for query views log. std::unique_ptr runtime_stats; + /// An overridden context bounded to this view with the correct SQL security grants. + ContextPtr context; + void setException(std::exception_ptr e) { exception = e; diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 9ba927fa201..931cf8ac19c 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -24,6 +24,7 @@ OK 2 OK OK +100 ===== TestGrants ===== OK OK diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index 9c9df120298..62b03b5d5ff 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -192,6 +192,41 @@ ${CLICKHOUSE_CLIENT} --user $user1 --query " ${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1" +${CLICKHOUSE_CLIENT} --multiquery < Date: Sat, 18 May 2024 00:29:22 +0200 Subject: [PATCH 366/651] harmonize h3PointDist* error messages --- src/Functions/h3PointDist.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/h3PointDist.cpp b/src/Functions/h3PointDist.cpp index 00b8fb0089e..889675a2dda 100644 --- a/src/Functions/h3PointDist.cpp +++ b/src/Functions/h3PointDist.cpp @@ -49,7 +49,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. Must be Float64", - arg->getName(), i, getName()); + arg->getName(), i + 1, getName()); } return std::make_shared(); } From c63753bab9f75ad52b4c8607d7b24e9f3edce7bc Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 18 May 2024 01:04:20 +0000 Subject: [PATCH 367/651] Fix tests --- .../0_stateless/03159_dynamic_type_all_types.reference | 6 +++--- .../0_stateless/03163_dynamic_as_supertype.reference | 10 +++++----- .../queries/0_stateless/03163_dynamic_as_supertype.sql | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference index 7dcaaa1f3ec..abecca893f9 100644 --- a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference @@ -110,9 +110,9 @@ Map(Dynamic, Dynamic) {'11':'v1','22':'1'} Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] Object(\'json\') {"1":"2"} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} Object(Nullable(\'json\')) {"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} Point (1.23,4.5600000000000005) Ring [(1.23,4.5600000000000005),(2.34,5.67)] String string @@ -260,8 +260,8 @@ Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] Object(\'json\') {"1":"2"} Object(Nullable(\'json\')) {"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} +Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} Point (1.23,4.5600000000000005) Ring [(1.23,4.5600000000000005),(2.34,5.67)] String string diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference index 5f1a8613a77..33e3a15c7fb 100644 --- a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference @@ -2,9 +2,9 @@ str_0 Dynamic(max_types=3) String 1 Dynamic(max_types=3) UInt64 str_2 Dynamic(max_types=3) String 3 Dynamic(max_types=3) UInt64 -str_1 String -42 UInt64 -str_2 String -43 UInt64 -2020-01-01 Date [1,2,3] Array(Int64) +2020-01-01 Date +str_1 String +str_2 String +42 UInt64 +43 UInt64 diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql index fbb6aa74fab..baba637eea4 100644 --- a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql +++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql @@ -4,5 +4,5 @@ CREATE TABLE dynamic_test_1 (d Dynamic(max_types=3)) ENGINE = Memory; INSERT INTO dynamic_test_1 VALUES ('str_1'), (42::UInt64); CREATE TABLE dynamic_test_2 (d Dynamic(max_types=5)) ENGINE = Memory; INSERT INTO dynamic_test_2 VALUES ('str_2'), (43::UInt64), ('2020-01-01'::Date), ([1, 2, 3]); -SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2; +SELECT * FROM (SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2) order by d; From 8562d8595a624149f69706c3833fd55a409b8aea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 May 2024 04:40:37 +0200 Subject: [PATCH 368/651] This log message is better in Trace --- src/Interpreters/Cache/Metadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index c832473c4cd..5ed4ccdbeca 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -846,7 +846,7 @@ LockedKey::~LockedKey() /// See comment near cleanupThreadFunc() for more details. key_metadata->key_state = KeyMetadata::KeyState::REMOVING; - LOG_DEBUG(key_metadata->logger(), "Submitting key {} for removal", getKey()); + LOG_TRACE(key_metadata->logger(), "Submitting key {} for removal", getKey()); key_metadata->addToCleanupQueue(); } From cafd074e0a888d722e5c6aacc9c363efd39f9648 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 May 2024 05:11:18 +0200 Subject: [PATCH 369/651] Prevent stack overflow in Fuzzer and Stress test --- docker/test/fuzzer/query-fuzzer-tweaks-users.xml | 5 +++++ docker/test/stateless/stress_tests.lib | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index ad261be1abe..e2a4976b385 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -36,6 +36,11 @@ + + + + + diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 6aaddbfe590..3b6ad244c82 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -154,6 +154,11 @@ EOL + + + + + From cde1b82ebdea4d2f768aacb65989228108715ed4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 May 2024 06:42:59 +0200 Subject: [PATCH 370/651] Print number tips in case of LowCardinality and Nullable --- src/Columns/ColumnNullable.cpp | 33 ++++++++++++++ src/Columns/ColumnNullable.h | 4 ++ .../Formats/Impl/PrettyBlockOutputFormat.cpp | 14 +++++- .../03156_nullable_number_tips.reference | 43 +++++++++++++++++++ .../03156_nullable_number_tips.sql | 24 +++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03156_nullable_number_tips.reference create mode 100644 tests/queries/0_stateless/03156_nullable_number_tips.sql diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index fa5fdfb8c21..30e62548ad6 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } @@ -116,6 +117,38 @@ void ColumnNullable::get(size_t n, Field & res) const getNestedColumn().get(n, res); } +Float64 ColumnNullable::getFloat64(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getFloat64", getName(), n); + else + return getNestedColumn().getFloat64(n); +} + +Float32 ColumnNullable::getFloat32(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getFloat32", getName(), n); + else + return getNestedColumn().getFloat32(n); +} + +UInt64 ColumnNullable::getUInt(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getUInt", getName(), n); + else + return getNestedColumn().getUInt(n); +} + +Int64 ColumnNullable::getInt(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getInt", getName(), n); + else + return getNestedColumn().getInt(n); +} + void ColumnNullable::insertData(const char * pos, size_t length) { if (pos == nullptr) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index ef4bf4fa41b..c7ebb6ed7b6 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -57,6 +57,10 @@ public: void get(size_t n, Field & res) const override; bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } + Float64 getFloat64(size_t n) const override; + Float32 getFloat32(size_t n) const override; + UInt64 getUInt(size_t n) const override; + Int64 getInt(size_t n) const override; bool isDefaultAt(size_t n) const override { return isNullAt(n); } StringRef getDataAt(size_t) const override; /// Will insert null value if pos=nullptr diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 086b5bfada2..b1dbe68579f 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace DB @@ -16,7 +18,14 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, bool color_) : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()), color(color_), mono_block(mono_block_) { - readable_number_tip = header_.getColumns().size() == 1 && WhichDataType(header_.getDataTypes()[0]->getTypeId()).isNumber(); + /// Decide whether we should print a tip near the single number value in the result. + if (header_.getColumns().size() == 1) + { + /// Check if it is a numeric type, possible wrapped by Nullable or LowCardinality. + DataTypePtr type = removeNullable(recursiveRemoveLowCardinality(header_.getDataTypes().at(0))); + if (isNumber(type)) + readable_number_tip = true; + } } @@ -497,6 +506,9 @@ void PrettyBlockOutputFormat::writeReadableNumberTip(const Chunk & chunk) if (!is_single_number) return; + if (columns[0]->isNullAt(0)) + return; + auto value = columns[0]->getFloat64(0); auto threshold = format_settings.pretty.output_format_pretty_single_large_number_tip_threshold; diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.reference b/tests/queries/0_stateless/03156_nullable_number_tips.reference new file mode 100644 index 00000000000..cb4e12684d8 --- /dev/null +++ b/tests/queries/0_stateless/03156_nullable_number_tips.reference @@ -0,0 +1,43 @@ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ +Nullable(UInt64), Nullable(size = 10, UInt64(size = 10), UInt8(size = 10)) + ┏━━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━━┩ +1. │ 1111111101 │ -- 1.11 billion + └────────────┘ + ┏━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━┩ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + x + +1. ᴺᵁᴸᴸ +UInt64, Sparse(size = 10, UInt64(size = 6), UInt64(size = 5)) + ┏━━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━━┩ +1. │ 1111111101 │ -- 1.11 billion + └────────────┘ + ┏━━━┓ + ┃ x ┃ + ┡━━━┩ +1. │ 0 │ + └───┘ + x + +1. 0 diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.sql b/tests/queries/0_stateless/03156_nullable_number_tips.sql new file mode 100644 index 00000000000..e6f2fa36d86 --- /dev/null +++ b/tests/queries/0_stateless/03156_nullable_number_tips.sql @@ -0,0 +1,24 @@ +SELECT 123456789 AS x FORMAT PrettyCompact; +SELECT toNullable(123456789) AS x FORMAT PrettyCompact; +SELECT toLowCardinality(toNullable(123456789)) AS x FORMAT PrettyCompact; +SELECT toNullable(toLowCardinality(123456789)) AS x FORMAT PrettyCompact; +SELECT toLowCardinality(123456789) AS x FORMAT PrettyCompact; + +CREATE TEMPORARY TABLE test (x Nullable(UInt64), PRIMARY KEY ()) ENGINE = MergeTree SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO test SELECT number % 2 ? number * 123456789 : NULL FROM numbers(10); + +SELECT DISTINCT dumpColumnStructure(*) FROM test; + +SELECT * FROM test ORDER BY ALL DESC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS FIRST LIMIT 1 FORMAT PrettySpace; + +DROP TEMPORARY TABLE test; +CREATE TEMPORARY TABLE test (x UInt64, PRIMARY KEY ()) ENGINE = MergeTree SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO test SELECT number % 2 ? number * 123456789 : NULL FROM numbers(10); + +SELECT DISTINCT dumpColumnStructure(*) FROM test; + +SELECT * FROM test ORDER BY ALL DESC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS FIRST LIMIT 1 FORMAT PrettySpace; From e885e9957ed807c67104b773c179bebf8241a4e8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 14:24:15 +0200 Subject: [PATCH 371/651] tests: fix expected error for 03036_reading_s3_archives Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/03036_reading_s3_archives.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 98ca0425174..00d7cc25e1a 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -18,5 +18,5 @@ CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: e SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); -CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError INCORRECT_QUERY } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) \ No newline at end of file +CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) From 2983941ab8652aac128b1e62bc633da70901850d Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 18 May 2024 16:34:24 +0200 Subject: [PATCH 372/651] Fix build for openssl (cherry picked from commit 7efd5d3ab62024619e26dc6c3e28d50bffc98d70) --- contrib/grpc-cmake/CMakeLists.txt | 70 ++++++++++++++++++++++++++-- contrib/openssl-cmake/CMakeLists.txt | 20 ++++++-- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index e7cf42cc87f..41263d65109 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -35,7 +35,7 @@ add_library(ch_contrib::grpc ALIAS _ch_contrib_grpc) # Here we are trying to build a binary tool grpc_cpp_plugin in case of cross-compilation. # We need this file only during compilation process itself so we need it for our "host" # platform, not "target" platform. -# If we are doing normal compilation this file will be produced in grpc.cmake. +# If we are doing normal compilation this executable will be produced in grpc.cmake. # # All code inside this block looks so weird because cmake fundametally doesn't # support different toolchains for different targets. So we just running it @@ -45,6 +45,66 @@ add_library(ch_contrib::grpc ALIAS _ch_contrib_grpc) if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME OR NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR) + # First we need to build openssl for host plaform + set(OPENSSL_BUILD_DIR "${_gRPC_BINARY_DIR}/build_openssl") + + set(OPENSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") + + execute_process( + COMMAND mkdir -p ${OPENSSL_BUILD_DIR} + COMMAND_ECHO STDOUT + ) + + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") + set (HOST_ARCH_AMD64 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") + set (HOST_ARCH_AARCH64 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(powerpc64le.*|ppc64le.*|PPC64LE.*)") + set (HOST_ARCH_PPC64LE 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(s390x.*|S390X.*)") + set (HOST_ARCH_S390X 1) + elseif (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "riscv64") + set (HOST_ARCH_RISCV64 1) + endif () + + if (CMAKE_HOST_SYSTEM_NAME MATCHES "Linux") + set (HOST_OS_LINUX 1) + elseif (CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin") + set (HOST_OS_DARWIN 1) + endif () + execute_process( + COMMAND ${CMAKE_COMMAND} + "-G${CMAKE_GENERATOR}" + "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DARCH_AMD64=${HOST_ARCH_AMD64}" + "-DARCH_AARCH64=${HOST_ARCH_AARCH64}" + "-DARCH_PPC64LE=${HOST_ARCH_PPC64LE}" + "-DARCH_S390X=${HOST_ARCH_S390X}" + "-DARCH_RISCV64=${HOST_ARCH_RISCV64}" + "-DOS_DARWIN=${HOST_OS_DARWIN}" + "-OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION=1" + "-DClickHouse_BINARY_DIR=${ClickHouse_BINARY_DIR}" + "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}" + "${OPENSSL_SOURCE_DIR}" + WORKING_DIRECTORY "${OPENSSL_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + execute_process( + COMMAND ${CMAKE_COMMAND} --build "${OPENSSL_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + execute_process( + COMMAND ${CMAKE_COMMAND} --install "${OPENSSL_BUILD_DIR}" + COMMAND_ECHO STDOUT) + + # It's not important on which file we depend, we just want to specify right order + add_library(openssl_for_grpc STATIC IMPORTED GLOBAL) + set_target_properties (openssl_for_grpc PROPERTIES IMPORTED_LOCATION "${OPENSSL_BUILD_DIR}/libssl.a") + add_dependencies(openssl_for_grpc "${OPENSSL_BUILD_DIR}/libssl.a") + + # Okay, openssl ready, let's build grpc_cpp_plugin set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build") execute_process( @@ -55,11 +115,12 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf") set(re2_source_dir "${ClickHouse_SOURCE_DIR}/contrib/re2") - set(ssl_source_dir "${ClickHouse_SOURCE_DIR}/contrib/openssl") + set(ssl_source_dir "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") set(zlib_source_dir "${ClickHouse_SOURCE_DIR}/contrib/zlib-ng") # For some reason config exists only in this directory set(zlib_config_source_dir "${ClickHouse_BINARY_DIR}/contrib/zlib-ng-cmake") set(cares_source_dir "${ClickHouse_SOURCE_DIR}/contrib/c-ares") + execute_process( COMMAND ${CMAKE_COMMAND} "-G${CMAKE_GENERATOR}" @@ -73,8 +134,10 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DPROTOBUF_ROOT_DIR=${protobuf_source_dir}" "-DRE2_ROOT_DIR=${re2_source_dir}" "-DCARES_ROOT_DIR=${cares_source_dir}" - "-DBORINGSSL_ROOT_DIR=${ssl_source_dir}" + "-DOPENSSL_ROOT_DIR=${OPENSSL_BUILD_DIR}" + "-DOPENSSL_INCLUDE_DIR=${OPENSSL_BUILD_DIR}/include" "-DZLIB_ROOT_DIR=${zlib_source_dir}" + "-DgRPC_SSL_PROVIDER=package" "${_gRPC_SOURCE_DIR}" WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}" COMMAND_ECHO STDOUT) @@ -86,4 +149,5 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME add_executable(grpc_cpp_plugin IMPORTED GLOBAL) set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") add_dependencies(grpc_cpp_plugin "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") + add_dependencies(grpc_cpp_plugin openssl_for_grpc) endif() diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 021c88bcb04..fba1b2edb31 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -32,9 +32,16 @@ set(OPENSSLDIR "/etc/ssl" CACHE PATH "Set the default openssl directory") set(OPENSSL_ENGINESDIR "/usr/local/lib/engines-3" CACHE PATH "Set the default openssl directory for engines") set(OPENSSL_MODULESDIR "/usr/local/lib/ossl-modules" CACHE PATH "Set the default openssl directory for modules") -add_definitions(-DOPENSSL_NO_KTLS -DOPENSSLDIR="${OPENSSLDIR}" -DENGINESDIR="${OPENSSL_ENGINESDIR}" -DMODULESDIR="${OPENSSL_MODULESDIR}" -DOPENSSL_USE_NODELETE -DOPENSSL_PIC) -target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") -target_compile_options(global-group INTERFACE "-Wno-poison-system-directories") +# special type of build during cross-compilation +if(OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION) + add_definitions(-DOPENSSL_NO_KTLS -DOPENSSLDIR="\\\"${OPENSSLDIR}\\\"" -DENGINESDIR="\\\"${OPENSSL_ENGINESDIR}\\\"" -DMODULESDIR="\\\"${OPENSSL_MODULESDIR}\\\"" -DOPENSSL_USE_NODELETE -DOPENSSL_PIC) + add_compile_options("-Wno-deprecated-declarations") + add_compile_options("-Wno-poison-system-directories") +else() + add_definitions(-DOPENSSL_NO_KTLS -DOPENSSLDIR="${OPENSSLDIR}" -DENGINESDIR="${OPENSSL_ENGINESDIR}" -DMODULESDIR="${OPENSSL_MODULESDIR}" -DOPENSSL_USE_NODELETE -DOPENSSL_PIC) + target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations") + target_compile_options(global-group INTERFACE "-Wno-poison-system-directories") +endif() if(ARCH_AMD64) if(OS_DARWIN) @@ -1447,4 +1454,9 @@ target_link_libraries(ssl crypto) add_library(OpenSSL::Crypto ALIAS crypto) add_library(OpenSSL::SSL ALIAS ssl) -install(FILES openssl.conf fipsmodule.conf DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) +if(OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION) + install(DIRECTORY "${PLATFORM_DIRECTORY}/include" DESTINATION "${CMAKE_BINARY_DIR}") + install(DIRECTORY "${OPENSSL_SOURCE_DIR}/include" DESTINATION "${CMAKE_BINARY_DIR}") +else() + install(FILES openssl.conf fipsmodule.conf DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) +endif() From 3743b4a2eda43c877dfd077448874b5b5904eb5c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 16:39:56 +0200 Subject: [PATCH 373/651] Fix sanitizers build by updating llvm/clang that includes fix for it Rebuild for clang 18.1.3, that contains a workaround [1] for sanitizers issue [2]: $ git tag --contains c2a57034eff048cd36c563c8e0051db3a70991b3 | tail -1 llvmorg-18.1.3 [1]: https://github.com/llvm/llvm-project/commit/c2a57034eff048cd36c563c8e0051db3a70991b3 [2]: https://github.com/ClickHouse/ClickHouse/issues/64086 Since right now version is not enough: $ docker run --rm -it clickhouse/test-util llvm-nm-18 --version llvm-nm, compatible with GNU nm Ubuntu LLVM version 18.1.2 Optimized build. But I don't see any fix for TSan, only MSan, but let's try. Signed-off-by: Azat Khuzhin --- docker/test/fasttest/Dockerfile | 3 +-- docker/test/util/Dockerfile | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 912ff191e57..e0be261d5e8 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,5 +1,4 @@ -# rebuild in #33610 -# docker build -t clickhouse/fasttest . +# docker build -t clickhouse/fasttest . ARG FROM_TAG=latest FROM clickhouse/test-util:$FROM_TAG diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index bbd473b3b7a..f13bb576f79 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -5,6 +5,14 @@ FROM ubuntu:22.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list +# FIXME: rebuild for clang 18.1.3, that contains a workaround [1] for +# sanitizers issue [2]: +# +# $ git tag --contains c2a57034eff048cd36c563c8e0051db3a70991b3 | tail -1 +# llvmorg-18.1.3 +# +# [1]: https://github.com/llvm/llvm-project/commit/c2a57034eff048cd36c563c8e0051db3a70991b3 +# [2]: https://github.com/ClickHouse/ClickHouse/issues/64086 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18 RUN apt-get update \ From 086e5d73a48bf5c0c166ee6aa89c9058a13d0877 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 18 May 2024 18:03:55 +0200 Subject: [PATCH 374/651] Followup --- contrib/grpc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 41263d65109..1c0bf41ff78 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -84,7 +84,7 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DARCH_S390X=${HOST_ARCH_S390X}" "-DARCH_RISCV64=${HOST_ARCH_RISCV64}" "-DOS_DARWIN=${HOST_OS_DARWIN}" - "-OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION=1" + "-DOPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION=1" "-DClickHouse_BINARY_DIR=${ClickHouse_BINARY_DIR}" "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}" "${OPENSSL_SOURCE_DIR}" From 9ba21335e4b4d157f4b1de884e87ef84e917dc62 Mon Sep 17 00:00:00 2001 From: pufit Date: Sat, 18 May 2024 12:20:24 -0400 Subject: [PATCH 375/651] fix test --- .../0_stateless/02884_create_view_with_sql_security_option.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index 62b03b5d5ff..a9a306a9e27 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -197,18 +197,21 @@ CREATE TABLE $db.source ( a UInt64 ) +ENGINE = MergeTree ORDER BY a; CREATE TABLE $db.destination1 ( `a` UInt64 ) +ENGINE = MergeTree ORDER BY a; CREATE TABLE $db.destination2 ( `a` UInt64 ) +ENGINE = MergeTree ORDER BY a; CREATE MATERIALIZED VIEW $db.mv1 TO $db.destination1 From e3329a10f92f429d70ba19ba9bf72e0279d43668 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 18 May 2024 18:00:32 +0200 Subject: [PATCH 376/651] CI: mergeable check redesign --- .github/workflows/pull_request.yml | 27 ++++++++++++++++++++++++++- tests/ci/ci_config.py | 4 +++- tests/ci/commit_status_helper.py | 9 ++++++++- tests/ci/merge_pr.py | 19 ++++++++++++++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 9f16e32707e..e0eda476d19 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -130,6 +130,14 @@ jobs: with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} + # stage for jobs that do not prohibit merge + Tests_3: + needs: [RunConfig, Tests_1, Tests_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_3 + data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# # Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2) @@ -156,15 +164,32 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} + CheckReadyForMerge: + if: ${{ !failure() && !cancelled() }} + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + filter: tree:0 + - name: Check and set merge status + if: ${{ github.event_name != 'merge_group' }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 merge_pr.py --set-status + ################################# Stage Final ################################# # FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3, CheckReadyForMerge] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 + with: + filter: tree:0 - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 588f4934125..6d450a79a69 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -25,6 +25,7 @@ class CIStages(metaclass=WithIter): BUILDS_2 = "Builds_2" TESTS_1 = "Tests_1" TESTS_2 = "Tests_2" + TESTS_3 = "Tests_3" class Runners(metaclass=WithIter): @@ -579,7 +580,6 @@ class CIConfig: elif job_name == JobNames.BUILD_CHECK_SPECIAL: stage_type = CIStages.TESTS_2 elif self.is_test_job(job_name): - stage_type = CIStages.TESTS_1 if job_name in CI_CONFIG.test_configs: required_build = CI_CONFIG.test_configs[job_name].required_build assert required_build @@ -591,6 +591,8 @@ class CIConfig: stage_type = CIStages.TESTS_2 else: stage_type = CIStages.TESTS_1 + if job_name not in REQUIRED_CHECKS: + stage_type = CIStages.TESTS_3 assert stage_type, f"BUG [{job_name}]" return stage_type diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 0b51d98b479..0ca25f39976 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -469,7 +469,10 @@ def update_mergeable_check( def trigger_mergeable_check( - commit: Commit, statuses: CommitStatuses, hide_url: bool = False + commit: Commit, + statuses: CommitStatuses, + hide_url: bool = False, + set_if_green: bool = False, ) -> CommitStatus: """calculate and update StatusNames.MERGEABLE""" required_checks = [ @@ -502,6 +505,10 @@ def trigger_mergeable_check( state = FAILURE description = format_description(description) + if not set_if_green and state == SUCCESS: + # do not set green Mergeable Check status + return SUCCESS + if mergeable_status is None or mergeable_status.description != description: return set_mergeable_check(commit, description, state, hide_url) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 450ece62d4b..519fa5fcebb 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -13,7 +13,11 @@ from github.PaginatedList import PaginatedList from github.PullRequestReview import PullRequestReview from github.WorkflowRun import WorkflowRun -from commit_status_helper import get_commit_filtered_statuses +from commit_status_helper import ( + get_commit_filtered_statuses, + get_commit, + trigger_mergeable_check, +) from get_robot_token import get_best_robot_token from github_helper import GitHub, NamedUser, PullRequest, Repository from pr_info import PRInfo @@ -173,6 +177,11 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, the script won't merge the PR, just check the conditions", ) + parser.add_argument( + "--set-ci-status", + action="store_true", + help="if set, only update/set Mergeable Check status", + ) parser.add_argument( "--check-approved", action="store_true", @@ -226,6 +235,14 @@ def main(): token = args.token or get_best_robot_token() gh = GitHub(token) repo = gh.get_repo(args.repo) + + if args.set_ci_status: + # set mergeable check status and exit + commit = get_commit(gh, args.pr_info.sha) + statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check(commit, statuses, hide_url=False, set_if_green=True) + return + # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 # pylint: disable=protected-access From d5eac97d458c0177a6d3d4bb2b603ef1d14feed1 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 18 May 2024 19:13:34 +0200 Subject: [PATCH 377/651] remove update_mergeable_check_from_ci.py --- .github/workflows/pull_request.yml | 1 - tests/ci/ci.py | 40 +----------------------------- tests/ci/commit_status_helper.py | 16 +++++------- 3 files changed, 7 insertions(+), 50 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index e0eda476d19..a9570bc2674 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -174,7 +174,6 @@ jobs: with: filter: tree:0 - name: Check and set merge status - if: ${{ github.event_name != 'merge_group' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-status diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 08048564383..3aa8f1bb813 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames, StatusNames +from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames from ci_utils import GHActions, is_hex, normalize_string from clickhouse_helper import ( CiLogsCredentials, @@ -34,16 +34,12 @@ from commit_status_helper import ( get_commit, post_commit_status, set_status_comment, - update_mergeable_check, - update_upstream_sync_status, ) from digest_helper import DockerDigester, JobDigester from env_helper import ( CI, GITHUB_JOB_API_URL, - GITHUB_REPOSITORY, GITHUB_RUN_URL, - GITHUB_UPSTREAM_REPOSITORY, REPO_COPY, REPORT_PATH, S3_BUILDS_BUCKET, @@ -56,7 +52,6 @@ from github_helper import GitHub from pr_info import PRInfo from report import ERROR, SUCCESS, BuildResult, JobReport from s3_helper import S3Helper -from synchronizer_utils import SYNC_BRANCH_PREFIX from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -2189,39 +2184,6 @@ def main() -> int: pr_info, dump_to_file=True, ) - if not pr_info.is_merge_queue: - # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) - mergeable_status = update_mergeable_check( - commit, - pr_info, - job_report.check_name or _get_ext_check_name(args.job_name), - ) - - # Process upstream StatusNames.SYNC - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and mergeable_status - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - upstream_pr_number = int( - pr_info.head_ref.split("/pr/", maxsplit=1)[1] - ) - update_upstream_sync_status( - upstream_pr_number, pr_info.number, gh, mergeable_status - ) - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [], - job_report.status, - 0, - job_report.start_time, - f"https://github.com/ClickHouse/ClickHouse/pull/{upstream_pr_number}", - StatusNames.SYNC, - ) - prepared_events[0]["test_context_raw"] = args.job_name - ch_helper.insert_events_into( - db="default", table="checks", events=prepared_events - ) print(f"Job report url: [{check_url}]") prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 0ca25f39976..2ba6fba8b83 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -447,9 +447,7 @@ def set_mergeable_check( ) -def update_mergeable_check( - commit: Commit, pr_info: PRInfo, check_name: str -) -> Optional[CommitStatus]: +def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: "check if the check_name in REQUIRED_CHECKS and then trigger update" not_run = ( pr_info.labels.intersection({Labels.SKIP_MERGEABLE_CHECK, Labels.RELEASE}) @@ -460,12 +458,12 @@ def update_mergeable_check( if not_run: # Let's avoid unnecessary work - return None + return logging.info("Update Mergeable Check by %s", check_name) statuses = get_commit_filtered_statuses(commit) - return trigger_mergeable_check(commit, statuses) + trigger_mergeable_check(commit, statuses) def trigger_mergeable_check( @@ -473,7 +471,7 @@ def trigger_mergeable_check( statuses: CommitStatuses, hide_url: bool = False, set_if_green: bool = False, -) -> CommitStatus: +) -> None: """calculate and update StatusNames.MERGEABLE""" required_checks = [ status for status in statuses if status.context in REQUIRED_CHECKS @@ -507,12 +505,10 @@ def trigger_mergeable_check( if not set_if_green and state == SUCCESS: # do not set green Mergeable Check status - return SUCCESS + return if mergeable_status is None or mergeable_status.description != description: - return set_mergeable_check(commit, description, state, hide_url) - - return mergeable_status + set_mergeable_check(commit, description, state, hide_url) def update_upstream_sync_status( From 4e4d078786a6a7a22b1c6d2190c8a414614cafd2 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 18 May 2024 20:36:04 +0200 Subject: [PATCH 378/651] return sync pr status to upstream from FinishCheck job --- .github/workflows/pull_request.yml | 4 ++-- tests/ci/commit_status_helper.py | 18 ++++++++++-------- tests/ci/finish_check.py | 13 ++++++++++++- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a9570bc2674..f2e4b5f328d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -165,7 +165,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: @@ -176,7 +176,7 @@ jobs: - name: Check and set merge status run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 merge_pr.py --set-status + python3 merge_pr.py --set-ci-status ################################# Stage Final ################################# # diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 2ba6fba8b83..ec9746e9af9 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -471,7 +471,7 @@ def trigger_mergeable_check( statuses: CommitStatuses, hide_url: bool = False, set_if_green: bool = False, -) -> None: +) -> StatusType: """calculate and update StatusNames.MERGEABLE""" required_checks = [ status for status in statuses if status.context in REQUIRED_CHECKS @@ -505,17 +505,19 @@ def trigger_mergeable_check( if not set_if_green and state == SUCCESS: # do not set green Mergeable Check status - return + pass + else: + if mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, state, hide_url) - if mergeable_status is None or mergeable_status.description != description: - set_mergeable_check(commit, description, state, hide_url) + return state def update_upstream_sync_status( upstream_pr_number: int, sync_pr_number: int, gh: Github, - mergeable_status: CommitStatus, + state: StatusType, ) -> None: upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) upstream_pr = upstream_repo.get_pull(upstream_pr_number) @@ -546,19 +548,19 @@ def update_upstream_sync_status( ) return - sync_status = get_status(mergeable_status.state) + sync_status = get_status(state) logging.info( "Using commit %s to post the %s status `%s`: [%s]", upstream_commit.sha, sync_status, StatusNames.SYNC, - mergeable_status.description, + "", ) post_commit_status( upstream_commit, sync_status, "", # let's won't expose any urls from cloud - mergeable_status.description, + "", StatusNames.SYNC, ) trigger_mergeable_check( diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index a66ebbeadf4..615b26b51f0 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -11,10 +11,13 @@ from commit_status_helper import ( post_commit_status, set_mergeable_check, trigger_mergeable_check, + update_upstream_sync_status, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import PENDING, SUCCESS +from synchronizer_utils import SYNC_BRANCH_PREFIX +from tests.ci.env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY def main(): @@ -40,7 +43,15 @@ def main(): set_mergeable_check(commit, "workflow passed", "success") else: statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses) + state = trigger_mergeable_check(commit, statuses, set_if_green=True) + + # Process upstream StatusNames.SYNC + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1]) + update_upstream_sync_status(upstream_pr_number, pr_info.number, gh, state) statuses = [s for s in statuses if s.context == StatusNames.CI] if not statuses: From 3a79b1facc63aa9ae3a8deb986bd00cf51c14c1f Mon Sep 17 00:00:00 2001 From: pufit Date: Sat, 18 May 2024 17:15:01 -0400 Subject: [PATCH 379/651] fix test --- .../0_stateless/02884_create_view_with_sql_security_option.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index a9a306a9e27..f1da343da36 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -202,14 +202,14 @@ ORDER BY a; CREATE TABLE $db.destination1 ( - `a` UInt64 + a UInt64 ) ENGINE = MergeTree ORDER BY a; CREATE TABLE $db.destination2 ( - `a` UInt64 + a UInt64 ) ENGINE = MergeTree ORDER BY a; From 79b3f52dc5189d6def125cf5ed9b1fb2e37267e4 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 18 May 2024 23:18:41 +0000 Subject: [PATCH 380/651] only interpolate expression should be used for DAG --- src/Planner/PlannerExpressionAnalysis.cpp | 7 +++---- .../03155_analyzer_interpolate.reference | 13 +++++++++++++ .../0_stateless/03155_analyzer_interpolate.sql | 7 +++++++ 3 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03155_analyzer_interpolate.reference create mode 100644 tests/queries/0_stateless/03155_analyzer_interpolate.sql diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 6e194b2c03e..6ff56f36933 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -439,20 +439,19 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, auto & interpolate_list_node = query_node.getInterpolate()->as(); PlannerActionsVisitor interpolate_actions_visitor(planner_context); - auto interpolate_actions_dag = std::make_shared(); + auto interpolate_expression_dag = std::make_shared(); for (auto & interpolate_node : interpolate_list_node.getNodes()) { auto & interpolate_node_typed = interpolate_node->as(); - interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); - interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); + interpolate_actions_visitor.visit(interpolate_expression_dag, interpolate_node_typed.getInterpolateExpression()); } std::unordered_map before_sort_actions_inputs_name_to_node; for (const auto & node : before_sort_actions->getInputs()) before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); - for (const auto & node : interpolate_actions_dag->getNodes()) + for (const auto & node : interpolate_expression_dag->getNodes()) { if (before_sort_actions_dag_output_node_names.contains(node.result_name) || node.type != ActionsDAG::ActionType::INPUT) diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.reference b/tests/queries/0_stateless/03155_analyzer_interpolate.reference new file mode 100644 index 00000000000..791aaa5b2a2 --- /dev/null +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.reference @@ -0,0 +1,13 @@ +0 [5] +0.5 [5] +1 [1] +1.5 [5] +2 [5] +2.5 [5] +3 [5] +3.5 [5] +4 [4] +4.5 [5] +5 [5] +5.5 [5] +7 [7] diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.sql b/tests/queries/0_stateless/03155_analyzer_interpolate.sql new file mode 100644 index 00000000000..9b56106f2b4 --- /dev/null +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/62464 +SET allow_experimental_analyzer = 1; + +SELECT n, [number] as inter FROM ( + SELECT toFloat32(number % 10) AS n, number + FROM numbers(10) WHERE number % 3 = 1 +) group by n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]); From 6ed515554befd46cfeaed82ddea1231d2dfae937 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 15:50:30 +0200 Subject: [PATCH 381/651] Revert "CI: disable ARM integration test cases with libunwind crash" --- tests/integration/test_crash_log/test.py | 8 -------- tests/integration/test_send_crash_reports/test.py | 4 ---- 2 files changed, 12 deletions(-) diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py index fe24777de94..a5b82039a84 100644 --- a/tests/integration/test_crash_log/test.py +++ b/tests/integration/test_crash_log/test.py @@ -39,10 +39,6 @@ def wait_for_clickhouse_stop(started_node): assert result == "OK", "ClickHouse process is still running" -@pytest.mark.skipif( - helpers.cluster.is_arm(), - reason="Fails on ARM, issue https://github.com/ClickHouse/ClickHouse/issues/63855", -) def test_pkill(started_node): if ( started_node.is_built_with_thread_sanitizer() @@ -63,10 +59,6 @@ def test_pkill(started_node): ) -@pytest.mark.skipif( - helpers.cluster.is_arm(), - reason="Fails on ARM, issue https://github.com/ClickHouse/ClickHouse/issues/63855", -) def test_pkill_query_log(started_node): for signal in ["SEGV", "4"]: # force create query_log if it was not created diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py index 15a15a13e2f..83c0827f891 100644 --- a/tests/integration/test_send_crash_reports/test.py +++ b/tests/integration/test_send_crash_reports/test.py @@ -35,10 +35,6 @@ def started_node(): pass -@pytest.mark.skipif( - helpers.cluster.is_arm(), - reason="Fails on ARM, issue https://github.com/ClickHouse/ClickHouse/issues/63855", -) def test_send_segfault(started_node): # NOTE: another option is to increase waiting time. if ( From 81a0c63928bb7608b55c1fcf5f0335c47e459a2b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 May 2024 17:58:42 +0200 Subject: [PATCH 382/651] Fix unwind on SIGSEGV on aarch64 (due to small stack for signal) Only SIGSEGV uses alternative stack (sigaltstack()), which is very small, 16K, and for aarch64 it is likely not enough for unwinding (likely due to lots of registers on this platform): (gdb) bt #0 libunwind::CFI_Parser::parseFDEInstructions (addressSpace=..., fdeInfo=..., cieInfo=..., upToPC=, arch=4, results=) at ./contrib/libunwind/src/DwarfParser.hpp:561 And this is: 554 case DW_CFA_remember_state: { 555 // Avoid operator new because that would be an upward dependency. 556 // Avoid malloc because it needs heap allocation. 557 PrologInfoStackEntry *entry = 558 (PrologInfoStackEntry *)_LIBUNWIND_REMEMBER_ALLOC( 559 sizeof(PrologInfoStackEntry)); 560 if (entry != NULL) { 561 entry->next = rememberStack.entry; ^^^ 562 entry->info = *results; 563 rememberStack.entry = entry; 564 } else { 565 return false; 566 } 567 _LIBUNWIND_TRACE_DWARF("DW_CFA_remember_state\n"); 568 break; 569 } Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ad96018a17e..f2930513280 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -23,6 +23,13 @@ thread_local ThreadStatus constinit * current_thread = nullptr; namespace { +#if defined(__aarch64__) +/// For aarch64 16K is not enough (likely due to tons of registers) +static constexpr size_t UNWIND_MINSIGSTKSZ = 32 << 10; +#else +static constexpr size_t UNWIND_MINSIGSTKSZ = 16 << 10; +#endif + /// Alternative stack for signal handling. /// /// This stack should not be located in the TLS (thread local storage), since: @@ -50,7 +57,7 @@ struct ThreadStack free(data); } - static size_t getSize() { return std::max(16 << 10, MINSIGSTKSZ); } + static size_t getSize() { return std::max(UNWIND_MINSIGSTKSZ, MINSIGSTKSZ); } void * getData() const { return data; } private: From 714420fc6713d8e1f1a6af29bd37ad932d86059f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 09:00:35 +0200 Subject: [PATCH 383/651] Speed up Set index a little --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 74 ++++++-------------- src/Storages/MergeTree/MergeTreeIndexSet.h | 1 - 2 files changed, 23 insertions(+), 52 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 1bd42518fdd..0b7e2e1f942 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -35,8 +35,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( size_t max_rows_) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) - , block(index_sample_block) + , block(index_sample_block_) { } @@ -47,8 +46,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( MutableColumns && mutable_columns_) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) - , block(index_sample_block.cloneWithColumns(std::move(mutable_columns_))) + , block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_))) { } @@ -67,10 +65,11 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const } size_serialization->serializeBinary(size(), ostr, {}); + size_t num_columns = block.columns(); - for (size_t i = 0; i < index_sample_block.columns(); ++i) + for (size_t i = 0; i < num_columns; ++i) { - const auto & type = index_sample_block.getByPosition(i).type; + const auto & type = block.getByPosition(i).type; ISerialization::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; @@ -92,8 +91,6 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); - block.clear(); - Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); @@ -102,24 +99,22 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd if (rows_to_read == 0) return; - for (size_t i = 0; i < index_sample_block.columns(); ++i) + size_t num_columns = block.columns(); + + ISerialization::DeserializeBinaryBulkSettings settings; + settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; + settings.position_independent_encoding = false; + + for (size_t i = 0; i < num_columns; ++i) { - const auto & column = index_sample_block.getByPosition(i); - const auto & type = column.type; - ColumnPtr new_column = type->createColumn(); - - - ISerialization::DeserializeBinaryBulkSettings settings; - settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; - settings.position_independent_encoding = false; + auto & elem = block.getByPosition(i); + elem.column = elem.column->cloneEmpty(); ISerialization::DeserializeBinaryBulkStatePtr state; - auto serialization = type->getDefaultSerialization(); + auto serialization = elem.type->getDefaultSerialization(); serialization->deserializeBinaryBulkStatePrefix(settings, state); - serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr); - - block.insert(ColumnWithTypeAndName(new_column, type, column.name)); + serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); } } @@ -284,42 +279,19 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (isUseless()) return true; - auto granule = std::dynamic_pointer_cast(idx_granule); - if (!granule) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Set index condition got a granule with the wrong type"); + const MergeTreeIndexGranuleSet & granule = assert_cast(*idx_granule); - if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows)) + size_t size = granule.size(); + if (size == 0 || (max_rows != 0 && size > max_rows)) return true; - Block result = granule->block; + Block result = granule.block; actions->execute(result); - const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name; - auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + const auto & column = result.getByPosition(result.columns() - 1).column; - if (column->onlyNull()) - return false; - - const auto * col_uint8 = typeid_cast(column.get()); - - const NullMap * null_map = nullptr; - - if (const auto * col_nullable = checkAndGetColumn(&*column)) - { - col_uint8 = typeid_cast(&col_nullable->getNestedColumn()); - null_map = &col_nullable->getNullMapData(); - } - - if (!col_uint8) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "ColumnUInt8 expected as Set index condition result"); - - const auto & condition = col_uint8->getData(); - size_t column_size = column->size(); - - for (size_t i = 0; i < column_size; ++i) - if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1) + for (size_t i = 0; i < size; ++i) + if (column->getBool(i)) return true; return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 7c66ba1a867..3348b5fbe34 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -34,7 +34,6 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const String index_name; const size_t max_rows; - const Block index_sample_block; Block block; }; From a67418bcc8abb685a1c0271f8f34d5434bb0a113 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 19 May 2024 07:14:37 +0000 Subject: [PATCH 384/651] add NOT_AN_AGGREGATE exception for interpolate expression columns --- src/Planner/PlannerExpressionAnalysis.cpp | 16 ++++++++++++++-- .../0_stateless/03155_analyzer_interpolate.sql | 9 +++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 6ff56f36933..e7d553af944 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -28,6 +28,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int NOT_AN_AGGREGATE; } namespace @@ -397,7 +398,8 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, SortAnalysisResult analyzeSort(const QueryNode & query_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, - ActionsChain & actions_chain) + ActionsChain & actions_chain, + std::optional aggregation_analysis_result_optional) { ActionsDAGPtr before_sort_actions = std::make_shared(input_columns); auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); @@ -451,6 +453,10 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (const auto & node : before_sort_actions->getInputs()) before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); + std::unordered_set aggregation_keys; + if (aggregation_analysis_result_optional) + aggregation_keys.insert(aggregation_analysis_result_optional->aggregation_keys.begin(), aggregation_analysis_result_optional->aggregation_keys.end()); + for (const auto & node : interpolate_expression_dag->getNodes()) { if (before_sort_actions_dag_output_node_names.contains(node.result_name) || @@ -466,6 +472,12 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, input_node_it = it; } + if (aggregation_analysis_result_optional) + if (!aggregation_keys.contains(node.result_name)) + throw Exception(ErrorCodes::NOT_AN_AGGREGATE, + "Column {} is not under aggregate function and not in GROUP BY keys. In query {}", + node.result_name, query_node.formatASTForErrorMessage()); + before_sort_actions_outputs.push_back(input_node_it->second); before_sort_actions_dag_output_node_names.insert(node.result_name); } @@ -567,7 +579,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo std::optional sort_analysis_result_optional; if (query_node.hasOrderBy()) { - sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain); + sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain, aggregation_analysis_result_optional); current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); } diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.sql b/tests/queries/0_stateless/03155_analyzer_interpolate.sql index 9b56106f2b4..b3c1d233f47 100644 --- a/tests/queries/0_stateless/03155_analyzer_interpolate.sql +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.sql @@ -1,7 +1,12 @@ -- https://github.com/ClickHouse/ClickHouse/issues/62464 SET allow_experimental_analyzer = 1; -SELECT n, [number] as inter FROM ( +SELECT n, [number] AS inter FROM ( SELECT toFloat32(number % 10) AS n, number FROM numbers(10) WHERE number % 3 = 1 -) group by n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]); +) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]); + +SELECT n, number+5 AS inter FROM ( -- { serverError NOT_AN_AGGREGATE } + SELECT toFloat32(number % 10) AS n, number, number*2 AS mn + FROM numbers(10) WHERE number % 3 = 1 +) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS mn * 2); From f065128ef2d67dfa4709f5d783d3c5a33b6f1e42 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 07:16:07 +0000 Subject: [PATCH 385/651] Fix style --- src/Compression/CompressionCodecDoubleDelta.cpp | 5 +++++ src/Coordination/KeeperServer.cpp | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 443b9d33532..cbd8cd57a62 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -21,6 +21,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + /** NOTE DoubleDelta is surprisingly bad name. The only excuse is that it comes from an academic paper. * Most people will think that "double delta" is just applying delta transform twice. * But in fact it is something more than applying delta transform twice. diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 953072c5b0e..b07c90b8660 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -45,7 +45,6 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int LOGICAL_ERROR; extern const int INVALID_CONFIG_PARAMETER; - extern const int UNEXPECTED_ZOOKEEPER_ERROR; } using namespace std::chrono_literals; From 500475f2b81e74276f6316e710ff7313244928e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 10:45:05 +0200 Subject: [PATCH 386/651] Add a test --- tests/performance/set_index_analysis.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/performance/set_index_analysis.xml diff --git a/tests/performance/set_index_analysis.xml b/tests/performance/set_index_analysis.xml new file mode 100644 index 00000000000..64d0af6690b --- /dev/null +++ b/tests/performance/set_index_analysis.xml @@ -0,0 +1,14 @@ + + + CREATE TABLE test_set (k UInt32, x UInt32, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 111; + + SYSTEM STOP MERGES + INSERT INTO test_set SELECT number, number DIV 100 + rand() % 7 FROM numbers(3000000) SETTINGS max_insert_threads = 4; + + + SELECT count() FROM test_set WHERE x = 1234 SETTINGS max_threads = 8; + + + SYSTEM START MERGES + DROP TABLE IF EXISTS test_set + From 2a9795f4e39e6b8e2ef0aee3d2e97f396416662e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 10:45:19 +0200 Subject: [PATCH 387/651] Minor changes --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index de769c59d33..949807bb88b 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1296,8 +1296,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( size_t last_index_mark = 0; PostingsCacheForStore cache_in_store; - - if (dynamic_cast(&*index_helper) != nullptr) + if (dynamic_cast(index_helper.get())) cache_in_store.store = GinIndexStoreFactory::instance().get(index_helper->getFileName(), part->getDataPartStoragePtr()); for (size_t i = 0; i < ranges.size(); ++i) @@ -1315,12 +1314,12 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) { - // vector of indexes of useful ranges + /// An array of indices of useful ranges. auto result = ann_condition->getUsefulRanges(granule); for (auto range : result) { - // range for corresponding index + /// The range for the corresponding index. MarkRange data_range( std::max(ranges[i].begin, index_mark * index_granularity + range), std::min(ranges[i].end, index_mark * index_granularity + range + 1)); @@ -1344,8 +1343,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( continue; MarkRange data_range( - std::max(ranges[i].begin, index_mark * index_granularity), - std::min(ranges[i].end, (index_mark + 1) * index_granularity)); + std::max(ranges[i].begin, index_mark * index_granularity), + std::min(ranges[i].end, (index_mark + 1) * index_granularity)); if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek) res.push_back(data_range); From 2909e6451b6e0118679b2b96b1c3e26e1e226870 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 08:02:06 +0000 Subject: [PATCH 388/651] Move StringUtils.h/cpp back to Common/ --- programs/CMakeLists.txt | 2 +- programs/client/CMakeLists.txt | 1 - programs/format/Format.cpp | 2 +- programs/git-import/git-import.cpp | 2 +- programs/keeper/clickhouse-keeper.cpp | 2 +- .../library-bridge/ExternalDictionaryLibraryUtils.h | 2 +- programs/main.cpp | 2 +- programs/obfuscator/Obfuscator.cpp | 2 +- programs/odbc-bridge/validateODBCConnectionString.cpp | 2 +- programs/server/CMakeLists.txt | 1 - src/Access/User.cpp | 2 +- src/Access/UsersConfigAccessStorage.cpp | 2 +- .../Combinators/AggregateFunctionCombinatorFactory.cpp | 2 +- src/Backups/BackupImpl.cpp | 2 +- src/Bridge/IBridge.cpp | 2 +- src/CMakeLists.txt | 4 ---- src/Client/ClientBase.cpp | 2 +- src/Client/Connection.cpp | 2 +- src/Common/CMakeLists.txt | 2 -- src/Common/Config/CMakeLists.txt | 4 ---- src/Common/Config/ConfigProcessor.cpp | 2 +- src/Common/FrequencyHolder.h | 2 +- src/Common/HTTPHeaderFilter.cpp | 2 +- src/Common/ProxyConfigurationResolverProvider.cpp | 2 +- src/Common/ProxyListConfigurationResolver.cpp | 2 +- src/Common/RemoteHostFilter.cpp | 2 +- src/Common/Scheduler/Nodes/DynamicResourceManager.cpp | 2 +- src/Common/SensitiveDataMasker.cpp | 2 +- src/Common/StringSearcher.h | 2 +- src/Common/{StringUtils => }/StringUtils.cpp | 2 +- src/Common/{StringUtils => }/StringUtils.h | 10 +++++----- src/Common/StringUtils/CMakeLists.txt | 8 -------- src/Common/TLDListsHolder.cpp | 2 +- src/Common/UTF8Helpers.cpp | 2 +- src/Common/Volnitsky.h | 2 +- src/Common/ZooKeeper/CMakeLists.txt | 4 ---- src/Common/ZooKeeper/TestKeeper.cpp | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- src/Common/ZooKeeper/ZooKeeperArgs.cpp | 2 +- src/Common/ZooKeeper/examples/CMakeLists.txt | 2 +- .../examples/zkutil_test_commands_new_lib.cpp | 2 +- src/Common/escapeForFileName.cpp | 2 +- src/Common/format.h | 2 +- src/Common/formatIPv6.cpp | 2 +- src/Common/formatIPv6.h | 2 +- src/Common/getMappedArea.cpp | 2 +- src/Common/getMultipleKeysFromConfig.cpp | 2 +- src/Coordination/FourLetterCommand.cpp | 2 +- src/Coordination/KeeperStorage.cpp | 2 +- src/DataTypes/DataTypeFactory.cpp | 2 +- src/DataTypes/DataTypeMap.cpp | 2 +- src/DataTypes/DataTypeTuple.cpp | 2 +- src/DataTypes/NestedUtils.cpp | 2 +- src/DataTypes/Serializations/SerializationMap.cpp | 2 +- src/Dictionaries/CMakeLists.txt | 1 - src/Dictionaries/DictionaryStructure.cpp | 2 +- src/Dictionaries/FileDictionarySource.cpp | 2 +- src/Disks/IVolume.cpp | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 +- src/Disks/VolumeJBOD.cpp | 2 +- src/Formats/CapnProtoSchema.cpp | 2 +- src/Formats/StructureToCapnProtoSchema.cpp | 2 +- src/Formats/StructureToProtobufSchema.cpp | 2 +- src/Functions/ExtractString.h | 2 +- src/Functions/FunctionTokens.h | 2 +- src/Functions/FunctionsProgrammingClassification.cpp | 2 +- src/Functions/FunctionsTonalityClassification.cpp | 2 +- .../JSONPath/Parsers/ParserJSONPathMemberAccess.cpp | 2 +- src/Functions/URL/domain.h | 2 +- src/Functions/URL/netloc.cpp | 2 +- src/Functions/URL/port.cpp | 2 +- src/Functions/URL/protocol.h | 2 +- .../ExternalUserDefinedExecutableFunctionsLoader.cpp | 2 +- .../UserDefined/UserDefinedSQLObjectsDiskStorage.cpp | 2 +- src/Functions/alphaTokens.cpp | 2 +- src/Functions/arrayStringConcat.cpp | 2 +- src/Functions/decodeHTMLComponent.cpp | 2 +- src/Functions/decodeXMLComponent.cpp | 2 +- src/Functions/extractAll.cpp | 2 +- src/Functions/extractTextFromHTML.cpp | 2 +- src/Functions/initcap.cpp | 2 +- src/Functions/soundex.cpp | 2 +- src/Functions/splitByChar.cpp | 2 +- src/Functions/splitByNonAlpha.cpp | 2 +- src/Functions/splitByRegexp.cpp | 2 +- src/Functions/splitByString.cpp | 2 +- src/Functions/splitByWhitespace.cpp | 2 +- src/Functions/translate.cpp | 2 +- src/IO/HTTPChunkedReadBuffer.cpp | 2 +- src/IO/ReadHelpers.cpp | 2 +- src/IO/ReadHelpers.h | 2 +- src/IO/S3Common.cpp | 2 +- src/IO/WriteHelpers.h | 2 +- src/IO/parseDateTimeBestEffort.cpp | 2 +- src/IO/readFloatText.h | 2 +- .../Access/InterpreterShowAccessEntitiesQuery.cpp | 2 +- .../Access/InterpreterShowCreateAccessEntityQuery.cpp | 2 +- src/Interpreters/Cluster.cpp | 2 +- src/Interpreters/ClusterDiscovery.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/ExternalLoader.cpp | 2 +- src/Interpreters/ExternalLoaderXMLConfigRepository.cpp | 2 +- src/Interpreters/ITokenExtractor.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Interpreters/InterserverCredentials.cpp | 2 +- src/Interpreters/JoinToSubqueryTransformVisitor.cpp | 2 +- src/Interpreters/QueryNormalizer.cpp | 2 +- src/Interpreters/TableJoin.cpp | 2 +- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 2 +- src/Interpreters/misc.h | 2 +- src/Parsers/CMakeLists.txt | 2 +- src/Parsers/ExpressionElementParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.cpp | 2 +- .../Kusto/KustoFunctions/KQLAggregationFunctions.cpp | 2 +- src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp | 2 +- src/Parsers/Kusto/parseKQLQuery.cpp | 2 +- src/Parsers/Lexer.cpp | 2 +- src/Parsers/ParserDataType.cpp | 2 +- src/Parsers/formatSettingName.cpp | 2 +- src/Parsers/obfuscateQueries.cpp | 2 +- src/Parsers/parseQuery.cpp | 2 +- src/Parsers/queryNormalization.cpp | 2 +- .../Merges/Algorithms/SummingSortedAlgorithm.cpp | 2 +- src/Server/HTTPHandler.cpp | 2 +- src/Server/HTTPHandlerFactory.h | 2 +- src/Server/HTTPHandlerRequestFilter.h | 2 +- src/Storages/CompressionCodecSelector.h | 2 +- .../DistributedAsyncInsertDirectoryQueue.cpp | 2 +- src/Storages/IStorage.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/ReplicatedMergeTreePartHeader.cpp | 2 +- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- src/Storages/RocksDB/StorageSystemRocksDB.cpp | 2 +- src/Storages/StorageFactory.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 2 +- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageSet.cpp | 2 +- src/Storages/System/CMakeLists.txt | 1 - src/Storages/System/StorageSystemDashboards.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/examples/CMakeLists.txt | 2 +- 144 files changed, 139 insertions(+), 165 deletions(-) rename src/Common/{StringUtils => }/StringUtils.cpp (95%) rename src/Common/{StringUtils => }/StringUtils.h (96%) delete mode 100644 src/Common/StringUtils/CMakeLists.txt diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 0d91de2dad8..4640882f2be 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -162,7 +162,7 @@ if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID) set (HARMFUL_LIB harmful) endif () -target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB}) +target_link_libraries (clickhouse PRIVATE clickhouse_common_io ${HARMFUL_LIB}) target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (ENABLE_CLICKHOUSE_KEEPER) diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index e160355ef7b..f8ef8ccaf65 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -10,7 +10,6 @@ set (CLICKHOUSE_CLIENT_LINK clickhouse_common_io clickhouse_functions clickhouse_parsers - string_utils ) if (TARGET ch_rust::skim) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index d4b975ce1e8..1b91e7ceaf3 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index eaf85df67b1..5430c4b0a42 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp index be2686d936b..f2f91930ac0 100644 --- a/programs/keeper/clickhouse-keeper.cpp +++ b/programs/keeper/clickhouse-keeper.cpp @@ -1,4 +1,4 @@ -#include +#include #include "config_tools.h" diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h index e6bf8f2a4c3..2eb44022742 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h +++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/programs/main.cpp b/programs/main.cpp index 4bb73399719..bc8476e4ce4 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -15,7 +15,7 @@ #include "config_tools.h" -#include +#include #include #include diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 8035f053b41..688ae1a1143 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp index 6c6e11162b4..72c3c9bddca 100644 --- a/programs/odbc-bridge/validateODBCConnectionString.cpp +++ b/programs/odbc-bridge/validateODBCConnectionString.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "validateODBCConnectionString.h" diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 81440b03690..76d201cc924 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -13,7 +13,6 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_parsers clickhouse_storages_system clickhouse_table_functions - string_utils ${LINK_RESOURCE_LIB} diff --git a/src/Access/User.cpp b/src/Access/User.cpp index ef5cf722113..6a296706baf 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 908ff780c62..1f9a977bab6 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp index a42e4177ac5..428f7168826 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp +++ b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.cpp @@ -1,6 +1,6 @@ #include "AggregateFunctionCombinatorFactory.h" -#include +#include namespace DB { diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8c0989b8202..8f32c918c61 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index 6da2b7c06da..c25d7bd2fed 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index da17bc1f41f..4e8946facda 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -307,7 +307,6 @@ endif() target_link_libraries (clickhouse_common_io PRIVATE - string_utils widechar_width ${LINK_LIBRARIES_ONLY_ON_X86_64} PUBLIC @@ -320,7 +319,6 @@ target_link_libraries (clickhouse_common_io target_link_libraries (clickhouse_compression PUBLIC - string_utils pcg_random clickhouse_parsers PRIVATE @@ -410,7 +408,6 @@ dbms_target_link_libraries ( clickhouse_parsers ch_contrib::lz4 Poco::JSON - string_utils PUBLIC boost::system clickhouse_common_io @@ -645,7 +642,6 @@ if (ENABLE_TESTS) dbms clickhouse_common_config clickhouse_common_zookeeper - string_utils hilite_comparator) if (TARGET ch_contrib::simdjson) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 67aba2256e8..085d95370ba 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 6e626c22527..19cd8cc4ee5 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index b83c8431f0a..d4802c28f53 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -1,5 +1,3 @@ -add_subdirectory(StringUtils) - if (ENABLE_BENCHMARKS) add_subdirectory(benchmarks) endif() diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index 009e2456322..09095ef5acc 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -13,8 +13,6 @@ target_link_libraries(clickhouse_common_config clickhouse_common_zookeeper common Poco::XML - PRIVATE - string_utils ) add_library(clickhouse_common_config_no_zookeeper_log ${SRCS}) @@ -23,8 +21,6 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log clickhouse_common_zookeeper_no_log common Poco::XML - PRIVATE - string_utils ) if (TARGET ch_contrib::yaml_cpp) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 7930ef20153..c9832e8efd5 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 64207dc5423..d6c32c225bf 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Common/HTTPHeaderFilter.cpp b/src/Common/HTTPHeaderFilter.cpp index 9ad8dd6fccf..fd02fe1ecef 100644 --- a/src/Common/HTTPHeaderFilter.cpp +++ b/src/Common/HTTPHeaderFilter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index d15b4d98615..1a6dc1090ee 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Common/ProxyListConfigurationResolver.cpp b/src/Common/ProxyListConfigurationResolver.cpp index c9b8923929a..c527c89ea6b 100644 --- a/src/Common/ProxyListConfigurationResolver.cpp +++ b/src/Common/ProxyListConfigurationResolver.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/src/Common/RemoteHostFilter.cpp b/src/Common/RemoteHostFilter.cpp index 815be8902e6..fe7bf878596 100644 --- a/src/Common/RemoteHostFilter.cpp +++ b/src/Common/RemoteHostFilter.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp index b568b9245ba..01aa7df48d3 100644 --- a/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp +++ b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/src/Common/SensitiveDataMasker.cpp b/src/Common/SensitiveDataMasker.cpp index 8c29b899841..a9f61a1c786 100644 --- a/src/Common/SensitiveDataMasker.cpp +++ b/src/Common/SensitiveDataMasker.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #ifndef NDEBUG diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index b3065354f65..d7e706fcd80 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Common/StringUtils/StringUtils.cpp b/src/Common/StringUtils.cpp similarity index 95% rename from src/Common/StringUtils/StringUtils.cpp rename to src/Common/StringUtils.cpp index 8a0b25dbdad..f61a39851e2 100644 --- a/src/Common/StringUtils/StringUtils.cpp +++ b/src/Common/StringUtils.cpp @@ -1,7 +1,7 @@ #include "StringUtils.h" -namespace detail +namespace impl { bool startsWith(const std::string & s, const char * prefix, size_t prefix_size) diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils.h similarity index 96% rename from src/Common/StringUtils/StringUtils.h rename to src/Common/StringUtils.h index 4958ecc9476..051e4338714 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils.h @@ -8,7 +8,7 @@ #include -namespace detail +namespace impl { bool startsWith(const std::string & s, const char * prefix, size_t prefix_size); bool endsWith(const std::string & s, const char * suffix, size_t suffix_size); @@ -17,12 +17,12 @@ namespace detail inline bool startsWith(const std::string & s, const std::string & prefix) { - return detail::startsWith(s, prefix.data(), prefix.size()); + return impl::startsWith(s, prefix.data(), prefix.size()); } inline bool endsWith(const std::string & s, const std::string & suffix) { - return detail::endsWith(s, suffix.data(), suffix.size()); + return impl::endsWith(s, suffix.data(), suffix.size()); } @@ -30,12 +30,12 @@ inline bool endsWith(const std::string & s, const std::string & suffix) /// string that is known at compile time. inline bool startsWith(const std::string & s, const char * prefix) { - return detail::startsWith(s, prefix, strlen(prefix)); + return impl::startsWith(s, prefix, strlen(prefix)); } inline bool endsWith(const std::string & s, const char * suffix) { - return detail::endsWith(s, suffix, strlen(suffix)); + return impl::endsWith(s, suffix, strlen(suffix)); } /// Given an integer, return the adequate suffix for diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt deleted file mode 100644 index 57c196d335c..00000000000 --- a/src/Common/StringUtils/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# These files are located in separate library, because they are used by separate products -# in places when no dependency on whole "dbms" library is possible. - -include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") - -add_headers_and_sources(clickhouse_common_stringutils .) - -add_library(string_utils ${clickhouse_common_stringutils_headers} ${clickhouse_common_stringutils_sources}) diff --git a/src/Common/TLDListsHolder.cpp b/src/Common/TLDListsHolder.cpp index c3991b86983..413d221090e 100644 --- a/src/Common/TLDListsHolder.cpp +++ b/src/Common/TLDListsHolder.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index b8f5c000e75..3c3cf61bbfc 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 6513bdb8bc3..3a148983790 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index aa06375bd6a..8b6c420e565 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -12,8 +12,6 @@ target_link_libraries (clickhouse_common_zookeeper clickhouse_common_io clickhouse_compression common - PRIVATE - string_utils ) # for examples -- no logging (to avoid extra dependencies) @@ -23,8 +21,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log clickhouse_common_io clickhouse_compression common - PRIVATE - string_utils ) if (ENABLE_EXAMPLES) add_subdirectory(examples) diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index d02ad4523ad..51ad2e7c830 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -1,7 +1,7 @@ #include "Common/ZooKeeper/IKeeper.h" #include #include -#include +#include #include #include diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index c62c5d0c143..be490d0bfc1 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -19,7 +19,7 @@ #include #include "Common/ZooKeeper/IKeeper.h" #include -#include +#include #include #include diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.cpp b/src/Common/ZooKeeper/ZooKeeperArgs.cpp index 40bd9d79a03..a581b6a7f38 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.cpp +++ b/src/Common/ZooKeeper/ZooKeeperArgs.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index a99fbe55dd8..16300115362 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -2,7 +2,7 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp) target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log) clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) -target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression string_utils) +target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log) diff --git a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp index 414006d48a4..25d66b94b46 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/escapeForFileName.cpp b/src/Common/escapeForFileName.cpp index a1f9bff28d0..2fe23245f49 100644 --- a/src/Common/escapeForFileName.cpp +++ b/src/Common/escapeForFileName.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Common/format.h b/src/Common/format.h index 27018f64064..3dbb88b4089 100644 --- a/src/Common/format.h +++ b/src/Common/format.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Common/formatIPv6.cpp b/src/Common/formatIPv6.cpp index 86e33beb7c3..341b3715d30 100644 --- a/src/Common/formatIPv6.cpp +++ b/src/Common/formatIPv6.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 3451eda6b3c..bb83e0381ef 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV6_BINARY_LENGTH = 16; diff --git a/src/Common/getMappedArea.cpp b/src/Common/getMappedArea.cpp index 4f40c604c6a..79191d68fb9 100644 --- a/src/Common/getMappedArea.cpp +++ b/src/Common/getMappedArea.cpp @@ -3,7 +3,7 @@ #if defined(OS_LINUX) -#include +#include #include #include #include diff --git a/src/Common/getMultipleKeysFromConfig.cpp b/src/Common/getMultipleKeysFromConfig.cpp index 7cf49fcc34d..6d6589a45a3 100644 --- a/src/Common/getMultipleKeysFromConfig.cpp +++ b/src/Common/getMultipleKeysFromConfig.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 25254e10441..28902bc8591 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include "Coordination/KeeperFeatureFlags.h" #include diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index df5c2e9e0c8..9bcd0608bf7 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 844384f3c95..427af090b91 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index d9f70e1659d..4d7ab63f966 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 5bbd79160d4..ebee096613d 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 6a56f885503..650559d21d9 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include "Columns/IColumn.h" #include diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 7b6f87baf2e..49bc89687f1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index 569acd9231a..783835356e6 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -39,7 +39,6 @@ target_link_libraries(clickhouse_dictionaries Poco::Data Poco::MongoDB Poco::Redis - string_utils ) target_link_libraries(clickhouse_dictionaries PUBLIC ch_contrib::abseil_swiss_tables) diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 0b6bdea60a3..c2f2f4a8532 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index 16a4ecaee75..fde46fb27f0 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -1,6 +1,6 @@ #include "FileDictionarySource.h" #include -#include +#include #include #include #include diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index d763c55c4aa..e6be0f36193 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -1,6 +1,6 @@ #include "IVolume.h" -#include +#include #include #include diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 043e5b8ef8c..adbdd9d13aa 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index c3114eb0b6f..35913613326 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -4,7 +4,7 @@ #if USE_AWS_S3 -#include +#include #include #include #include diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index a0c71583a22..d0e9d32ff5e 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -1,6 +1,6 @@ #include "VolumeJBOD.h" -#include +#include #include #include #include diff --git a/src/Formats/CapnProtoSchema.cpp b/src/Formats/CapnProtoSchema.cpp index 559047a6f8d..6076dae4157 100644 --- a/src/Formats/CapnProtoSchema.cpp +++ b/src/Formats/CapnProtoSchema.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Formats/StructureToCapnProtoSchema.cpp b/src/Formats/StructureToCapnProtoSchema.cpp index 99298fadee1..cd45b19d3c0 100644 --- a/src/Formats/StructureToCapnProtoSchema.cpp +++ b/src/Formats/StructureToCapnProtoSchema.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Formats/StructureToProtobufSchema.cpp b/src/Formats/StructureToProtobufSchema.cpp index 178c0ae3cc2..9fd02969adb 100644 --- a/src/Formats/StructureToProtobufSchema.cpp +++ b/src/Formats/StructureToProtobufSchema.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h index aa0e1b04835..6beb8be830a 100644 --- a/src/Functions/ExtractString.h +++ b/src/Functions/ExtractString.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index c80152bc71d..d6cf6a24983 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index a93e1d9a87d..c01e47ad0d7 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -2,7 +2,7 @@ #if USE_NLP -#include +#include #include #include diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index 3de38d99c88..a9321819a26 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -2,7 +2,7 @@ #if USE_NLP -#include +#include #include #include diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index 6d5e37623e9..fad822379d4 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 87f5aeffda7..936fb9d5f00 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -3,7 +3,7 @@ #include "protocol.h" #include #include -#include +#include namespace DB { diff --git a/src/Functions/URL/netloc.cpp b/src/Functions/URL/netloc.cpp index abfa7ec26fd..d1ca4fa1614 100644 --- a/src/Functions/URL/netloc.cpp +++ b/src/Functions/URL/netloc.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index 942f6b702fd..c8f50f10a56 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Functions/URL/protocol.h b/src/Functions/URL/protocol.h index c1d83192835..5e90f538ff1 100644 --- a/src/Functions/URL/protocol.h +++ b/src/Functions/URL/protocol.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index a4f17aa1201..2c031158c48 100644 --- a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -1,7 +1,7 @@ #include "ExternalUserDefinedExecutableFunctionsLoader.h" #include -#include +#include #include diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index d874612ad04..b406cc8d317 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -3,7 +3,7 @@ #include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h" #include "Functions/UserDefined/UserDefinedSQLObjectType.h" -#include +#include #include #include #include diff --git a/src/Functions/alphaTokens.cpp b/src/Functions/alphaTokens.cpp index 35f434e7498..f4d77f1d654 100644 --- a/src/Functions/alphaTokens.cpp +++ b/src/Functions/alphaTokens.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/arrayStringConcat.cpp b/src/Functions/arrayStringConcat.cpp index b787feeeca1..421408c01f2 100644 --- a/src/Functions/arrayStringConcat.cpp +++ b/src/Functions/arrayStringConcat.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index 4db3c43f946..00a601b77a6 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Functions/decodeXMLComponent.cpp b/src/Functions/decodeXMLComponent.cpp index a25e67e0e37..cbbe46fcb8c 100644 --- a/src/Functions/decodeXMLComponent.cpp +++ b/src/Functions/decodeXMLComponent.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/extractAll.cpp b/src/Functions/extractAll.cpp index f0c18bf79b9..5801a7b8f4f 100644 --- a/src/Functions/extractAll.cpp +++ b/src/Functions/extractAll.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp index 4eefeaa9f86..d9aa004b279 100644 --- a/src/Functions/extractTextFromHTML.cpp +++ b/src/Functions/extractTextFromHTML.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include /** A function to extract text from HTML or XHTML. diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 5460ee06792..6b2958227bc 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Functions/soundex.cpp b/src/Functions/soundex.cpp index 0cddfc90f7c..77ddb14a6ec 100644 --- a/src/Functions/soundex.cpp +++ b/src/Functions/soundex.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/splitByChar.cpp b/src/Functions/splitByChar.cpp index d3d5dc9fe4a..52db5623b89 100644 --- a/src/Functions/splitByChar.cpp +++ b/src/Functions/splitByChar.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByNonAlpha.cpp b/src/Functions/splitByNonAlpha.cpp index 4486a33aa88..17ff6cfb0a8 100644 --- a/src/Functions/splitByNonAlpha.cpp +++ b/src/Functions/splitByNonAlpha.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 430089f14ee..32afb813a04 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByString.cpp b/src/Functions/splitByString.cpp index 5c97f9841e7..e9b70a58eab 100644 --- a/src/Functions/splitByString.cpp +++ b/src/Functions/splitByString.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/splitByWhitespace.cpp b/src/Functions/splitByWhitespace.cpp index cf21a218b15..5bf27f64c17 100644 --- a/src/Functions/splitByWhitespace.cpp +++ b/src/Functions/splitByWhitespace.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index c7173909029..2df08a5664e 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp index 41788fa8ce7..b5ac6a9b728 100644 --- a/src/IO/HTTPChunkedReadBuffer.cpp +++ b/src/IO/HTTPChunkedReadBuffer.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 8c83eac5cff..b428b1c7d8a 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 5cf7d3e5b66..63bfae513e7 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 56e3e0df21b..4583b2bb0ac 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include "config.h" diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index a30e2feb439..d4b2d8ea0dc 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 70401fdf72d..e046e837689 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index d1652784cc2..3a21d7201a9 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunneeded-internal-declaration" diff --git a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp index 76979ed86c8..71fc1047cfa 100644 --- a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 1147d74c146..96d8e55a74c 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index ec6283df649..59c98491c14 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index d432488964d..6f9c375c2f5 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 395218f834f..d80d5cd5b93 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index a636e59fa1a..96405f35f3f 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp b/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp index a15f918f457..e404797501d 100644 --- a/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp +++ b/src/Interpreters/ExternalLoaderXMLConfigRepository.cpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include #include diff --git a/src/Interpreters/ITokenExtractor.cpp b/src/Interpreters/ITokenExtractor.cpp index 9c4027dfa0a..1c5d0d4b6d4 100644 --- a/src/Interpreters/ITokenExtractor.cpp +++ b/src/Interpreters/ITokenExtractor.cpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 519cbde588f..4fdd804452d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -6,7 +6,7 @@ #include #include "Common/Exception.h" -#include +#include #include #include #include diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp index c344732a262..1327a2ef388 100644 --- a/src/Interpreters/InterserverCredentials.cpp +++ b/src/Interpreters/InterserverCredentials.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 5cda4c982b4..6a3a181ed26 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index f47635a3c3f..a8639906aad 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 1ee8ca14b2f..6191eb73fd4 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 03df7283992..c21c4d34fa8 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index c009808de3f..b77fc5aee1e 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 3bc1b3a981f..d5653da7b3a 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -7,7 +7,7 @@ add_headers_and_sources(clickhouse_parsers ./Kusto) add_headers_and_sources(clickhouse_parsers ./PRQL) add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access) if (TARGET ch_rust::prql) target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) endif () diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 59b586d46a0..416f696323c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include "Parsers/CommonParsers.h" diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index ee9e199b9b8..7cdfaf988a3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 16436d38d32..0eb83b8b5ac 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp index c4f84d576cb..19625f6624d 100644 --- a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Parsers/Kusto/parseKQLQuery.cpp b/src/Parsers/Kusto/parseKQLQuery.cpp index 34a009873f8..34076168480 100644 --- a/src/Parsers/Kusto/parseKQLQuery.cpp +++ b/src/Parsers/Kusto/parseKQLQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 9ac6e623803..34855a7ce20 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index fdd712f2e68..05c9a2cd306 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Parsers/formatSettingName.cpp b/src/Parsers/formatSettingName.cpp index efbfffddd7b..59973379167 100644 --- a/src/Parsers/formatSettingName.cpp +++ b/src/Parsers/formatSettingName.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index 2ed551851e8..074b6797517 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 66bd76687aa..41c51267496 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/queryNormalization.cpp b/src/Parsers/queryNormalization.cpp index 4a9dd8ceb98..4890ad6952d 100644 --- a/src/Parsers/queryNormalization.cpp +++ b/src/Parsers/queryNormalization.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 79b5dae2d6e..df27520856e 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index ce80d0c22c6..a677c537622 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index ac18c36e6c9..b4c32366463 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 15e64cf7f48..de1920bd535 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index ad6e943e821..e03d06bacdb 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 14866c25365..d471c67553d 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index b532abc9074..920155bf689 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 463ca07ec57..483e949a26f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 167160db317..527dac01b71 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp index 24d907dbad6..9aadc3c3ca7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d6c36d12bf5..9a368bd44f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 4406a7c3fd4..5105b190fd9 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 307a0aa001a..9d12a1569d8 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 1ac739f03fd..306ae782d24 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 25c48de94e1..54b2d5ef6fb 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index df5bbdf9f78..378b81c6d18 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 54218351cf1..205a90423bf 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index c3a2e726365..899c3d5cf40 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -47,7 +47,6 @@ add_library(clickhouse_storages_system ${storages_system_sources}) target_link_libraries(clickhouse_storages_system PRIVATE dbms common - string_utils clickhouse_common_zookeeper clickhouse_parsers Poco::JSON diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 23d8fcfc481..9682fbc74a1 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index d428d6bd6d0..43398517072 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index cddfc9404d4..b4786b7313b 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -5,4 +5,4 @@ clickhouse_add_executable (merge_selector2 merge_selector2.cpp) target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) From 17ce44907677dbfb6aa2ebc27593ffe02c239149 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 09:38:17 +0000 Subject: [PATCH 389/651] Move isAllASCII from UTF8Helpers to StringUtils --- src/Common/StringUtils.cpp | 72 ++++++++++++++++++++++++++++- src/Common/StringUtils.h | 5 ++ src/Common/UTF8Helpers.cpp | 74 +----------------------------- src/Common/UTF8Helpers.h | 4 -- src/Functions/LowerUpperUTF8Impl.h | 3 +- src/Functions/padString.cpp | 5 +- src/Functions/reverseUTF8.cpp | 3 +- src/Functions/substring.cpp | 5 +- src/Functions/substringIndex.cpp | 13 +++--- 9 files changed, 94 insertions(+), 90 deletions(-) diff --git a/src/Common/StringUtils.cpp b/src/Common/StringUtils.cpp index f61a39851e2..18577e64c01 100644 --- a/src/Common/StringUtils.cpp +++ b/src/Common/StringUtils.cpp @@ -1,4 +1,10 @@ -#include "StringUtils.h" +#include + +#include + +#if USE_MULTITARGET_CODE +#include +#endif namespace impl @@ -15,3 +21,67 @@ bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) } } + +DECLARE_DEFAULT_CODE( +bool isAllASCII(const UInt8 * data, size_t size) +{ + UInt8 mask = 0; + for (size_t i = 0; i < size; ++i) + mask |= data[i]; + + return !(mask & 0x80); +}) + +DECLARE_SSE42_SPECIFIC_CODE( +/// Copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h +bool isAllASCII(const UInt8 * data, size_t size) +{ + __m128i masks = _mm_setzero_si128(); + + size_t i = 0; + for (; i + 16 <= size; i += 16) + { + __m128i bytes = _mm_loadu_si128(reinterpret_cast(data + i)); + masks = _mm_or_si128(masks, bytes); + } + int mask = _mm_movemask_epi8(masks); + + UInt8 tail_mask = 0; + for (; i < size; i++) + tail_mask |= data[i]; + + mask |= (tail_mask & 0x80); + return !mask; +}) + +DECLARE_AVX2_SPECIFIC_CODE( +bool isAllASCII(const UInt8 * data, size_t size) +{ + __m256i masks = _mm256_setzero_si256(); + + size_t i = 0; + for (; i + 32 <= size; i += 32) + { + __m256i bytes = _mm256_loadu_si256(reinterpret_cast(data + i)); + masks = _mm256_or_si256(masks, bytes); + } + int mask = _mm256_movemask_epi8(masks); + + UInt8 tail_mask = 0; + for (; i < size; i++) + tail_mask |= data[i]; + + mask |= (tail_mask & 0x80); + return !mask; +}) + +bool isAllASCII(const UInt8 * data, size_t size) +{ +#if USE_MULTITARGET_CODE + if (isArchSupported(DB::TargetArch::AVX2)) + return TargetSpecific::AVX2::isAllASCII(data, size); + if (isArchSupported(DB::TargetArch::SSE42)) + return TargetSpecific::SSE42::isAllASCII(data, size); +#endif + return TargetSpecific::Default::isAllASCII(data, size); +} diff --git a/src/Common/StringUtils.h b/src/Common/StringUtils.h index 051e4338714..fe5fc3c058f 100644 --- a/src/Common/StringUtils.h +++ b/src/Common/StringUtils.h @@ -7,6 +7,8 @@ #include #include +#include + namespace impl { @@ -315,6 +317,9 @@ inline void trim(std::string & str, char c = ' ') trimLeft(str, c); } +/// If all characters in the string are ASCII, return true +bool isAllASCII(const UInt8 * data, size_t size); + constexpr bool containsGlobs(const std::string & str) { return str.find_first_of("*?{") != std::string::npos; diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index 3c3cf61bbfc..8c8c8e8327b 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -1,14 +1,9 @@ -#include -#include #include +#include #include #include -#if USE_MULTITARGET_CODE -#include -#endif - namespace DB { namespace UTF8 @@ -208,7 +203,6 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l } - size_t computeWidth(const UInt8 * data, size_t size, size_t prefix) noexcept { return computeWidthImpl(data, size, prefix, 0); @@ -219,71 +213,5 @@ size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, s return computeWidthImpl(data, size, prefix, limit); } - -DECLARE_DEFAULT_CODE( -bool isAllASCII(const UInt8 * data, size_t size) -{ - UInt8 mask = 0; - for (size_t i = 0; i < size; ++i) - mask |= data[i]; - - return !(mask & 0x80); -}) - -DECLARE_SSE42_SPECIFIC_CODE( -/// Copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h -bool isAllASCII(const UInt8 * data, size_t size) -{ - __m128i masks = _mm_setzero_si128(); - - size_t i = 0; - for (; i + 16 <= size; i += 16) - { - __m128i bytes = _mm_loadu_si128(reinterpret_cast(data + i)); - masks = _mm_or_si128(masks, bytes); - } - int mask = _mm_movemask_epi8(masks); - - UInt8 tail_mask = 0; - for (; i < size; i++) - tail_mask |= data[i]; - - mask |= (tail_mask & 0x80); - return !mask; -}) - -DECLARE_AVX2_SPECIFIC_CODE( -bool isAllASCII(const UInt8 * data, size_t size) -{ - __m256i masks = _mm256_setzero_si256(); - - size_t i = 0; - for (; i + 32 <= size; i += 32) - { - __m256i bytes = _mm256_loadu_si256(reinterpret_cast(data + i)); - masks = _mm256_or_si256(masks, bytes); - } - int mask = _mm256_movemask_epi8(masks); - - UInt8 tail_mask = 0; - for (; i < size; i++) - tail_mask |= data[i]; - - mask |= (tail_mask & 0x80); - return !mask; -}) - -bool isAllASCII(const UInt8* data, size_t size) -{ -#if USE_MULTITARGET_CODE - if (isArchSupported(TargetArch::AVX2)) - return TargetSpecific::AVX2::isAllASCII(data, size); - if (isArchSupported(TargetArch::SSE42)) - return TargetSpecific::SSE42::isAllASCII(data, size); -#endif - return TargetSpecific::Default::isAllASCII(data, size); -} - - } } diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h index 933b62c7b63..b09d92bd731 100644 --- a/src/Common/UTF8Helpers.h +++ b/src/Common/UTF8Helpers.h @@ -136,10 +136,6 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix = 0) noexcept */ size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept; - -/// If all the characters in the string are ASCII, return true. -bool isAllASCII(const UInt8* data, size_t size); - } } diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index bb794a0f8ed..eebba7b9d5f 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #ifdef __SSE2__ @@ -94,7 +95,7 @@ struct LowerUpperUTF8Impl if (data.empty()) return; - bool all_ascii = UTF8::isAllASCII(data.data(), data.size()); + bool all_ascii = isAllASCII(data.data(), data.size()); if (all_ascii) { LowerUpperImpl::vector(data, offsets, res_data, res_offsets); diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp index 0922e0ddb8a..8670c837e21 100644 --- a/src/Functions/padString.cpp +++ b/src/Functions/padString.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -237,8 +238,8 @@ namespace void executeForSource(SourceStrings && strings, const ColumnPtr & column_length, const String & pad_string, StringSink & res_sink) const { const auto & chars = strings.getElements(); - bool all_ascii = UTF8::isAllASCII(reinterpret_cast(pad_string.data()), pad_string.size()) - && UTF8::isAllASCII(chars.data(), chars.size()); + bool all_ascii = isAllASCII(reinterpret_cast(pad_string.data()), pad_string.size()) + && isAllASCII(chars.data(), chars.size()); bool is_actually_utf8 = is_utf8 && !all_ascii; if (!is_actually_utf8) diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp index 4ea861919a1..1aee349fa8d 100644 --- a/src/Functions/reverseUTF8.cpp +++ b/src/Functions/reverseUTF8.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "reverse.h" @@ -27,7 +28,7 @@ struct ReverseUTF8Impl ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { - bool all_ascii = UTF8::isAllASCII(data.data(), data.size()); + bool all_ascii = isAllASCII(data.data(), data.size()); if (all_ascii) { ReverseImpl::vector(data, offsets, res_data, res_offsets); diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index 122f83d758b..f1dea7db018 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -149,7 +150,7 @@ public: { if (const ColumnString * col = checkAndGetColumn(column_string.get())) { - bool all_ascii = UTF8::isAllASCII(col->getChars().data(), col->getChars().size()); + bool all_ascii = isAllASCII(col->getChars().data(), col->getChars().size()); if (all_ascii) return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, StringSource(*col), input_rows_count); else @@ -159,7 +160,7 @@ public: if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) { StringRef str_ref = col_const->getDataAt(0); - bool all_ascii = UTF8::isAllASCII(reinterpret_cast(str_ref.data), str_ref.size); + bool all_ascii = isAllASCII(reinterpret_cast(str_ref.data), str_ref.size); if (all_ascii) return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); else diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 74474cb4b23..15a321bd5b0 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -129,8 +130,8 @@ namespace res_data.reserve(str_column->getChars().size() / 2); res_offsets.reserve(rows); - bool all_ascii = UTF8::isAllASCII(str_column->getChars().data(), str_column->getChars().size()) - && UTF8::isAllASCII(reinterpret_cast(delim.data()), delim.size()); + bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); @@ -162,8 +163,8 @@ namespace res_data.reserve(str_column->getChars().size() / 2); res_offsets.reserve(rows); - bool all_ascii = UTF8::isAllASCII(str_column->getChars().data(), str_column->getChars().size()) - && UTF8::isAllASCII(reinterpret_cast(delim.data()), delim.size()); + bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); @@ -194,8 +195,8 @@ namespace res_data.reserve(str.size() * rows / 2); res_offsets.reserve(rows); - bool all_ascii = UTF8::isAllASCII(reinterpret_cast(str.data()), str.size()) - && UTF8::isAllASCII(reinterpret_cast(delim.data()), delim.size()); + bool all_ascii = isAllASCII(reinterpret_cast(str.data()), str.size()) + && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); From 113bb0000510b30c0845593911baa6d72cd5fb20 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 08:34:59 +0000 Subject: [PATCH 390/651] Fix clang-tidy "-readability-redundant-inline-specifier" --- .clang-tidy | 1 - base/base/BorrowedObjectPool.h | 14 ++--- .../library-bridge/LibraryBridgeHandlers.h | 2 +- programs/server/MetricsTransmitter.h | 8 +-- .../AggregateFunctionSequenceNextNode.cpp | 2 +- .../Combinators/AggregateFunctionIf.cpp | 4 +- src/AggregateFunctions/QuantileTDigest.h | 2 +- src/AggregateFunctions/QuantileTiming.h | 2 +- src/AggregateFunctions/ThetaSketchData.h | 4 +- src/AggregateFunctions/UniqVariadicHash.h | 8 +-- src/AggregateFunctions/UniquesHashSet.h | 10 ++-- ...egateFunctionsArithmericOperationsPass.cpp | 4 +- .../Passes/ComparisonTupleEliminationPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 2 +- .../Passes/NormalizeCountVariantsPass.cpp | 2 +- .../RewriteAggregateFunctionWithIfPass.cpp | 2 +- .../RewriteSumFunctionWithSumAndCountPass.cpp | 2 +- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 4 +- .../CatBoostLibraryBridgeHelper.h | 14 ++--- .../ExternalDictionaryLibraryBridgeHelper.h | 20 +++---- src/BridgeHelper/IBridgeHelper.h | 6 +- src/BridgeHelper/LibraryBridgeHelper.h | 2 +- src/BridgeHelper/XDBCBridgeHelper.h | 16 +++--- src/Common/CPUID.h | 4 +- src/Common/ColumnsHashingImpl.h | 2 +- src/Common/CombinedCardinalityEstimator.h | 6 +- src/Common/CompactArray.h | 2 +- src/Common/CounterInFile.h | 2 +- src/Common/CurrentThread.h | 4 +- src/Common/HashTable/FixedHashTable.h | 2 +- src/Common/HashTable/HashTable.h | 2 +- src/Common/HashTable/PackedHashMap.h | 2 +- src/Common/HashTable/SmallTable.h | 2 +- src/Common/HyperLogLogCounter.h | 20 +++---- src/Common/IntervalTree.h | 18 +++--- src/Common/JSONParsers/SimdJSONParser.h | 36 ++++++------ src/Common/PODArray.h | 2 +- src/Common/PoolBase.h | 2 +- src/Common/RadixSort.h | 4 +- src/Common/SpaceSaving.h | 4 +- src/Common/ThreadProfileEvents.h | 2 +- src/Common/Volnitsky.h | 18 +++--- src/Common/ZooKeeper/IKeeper.h | 6 +- src/Common/findExtreme.cpp | 4 +- src/Core/Field.h | 4 +- src/Core/Joins.h | 24 ++++---- src/Daemon/BaseDaemon.h | 2 +- src/DataTypes/DataTypeDecimalBase.h | 2 +- src/Dictionaries/CacheDictionaryStorage.h | 8 +-- src/Dictionaries/DictionaryHelpers.h | 8 +-- src/Dictionaries/Embedded/RegionsNames.h | 4 +- src/Dictionaries/ICacheDictionaryStorage.h | 16 +++--- src/Dictionaries/IPAddressDictionary.cpp | 2 +- src/Dictionaries/RegExpTreeDictionary.cpp | 4 +- src/Dictionaries/SSDCacheDictionaryStorage.h | 56 +++++++++---------- src/Disks/IO/IOUringReader.h | 4 +- src/Functions/DivisionUtils.h | 6 +- src/Functions/ExtractString.h | 6 +- src/Functions/FunctionBinaryArithmetic.h | 8 +-- src/Functions/FunctionSQLJSON.h | 20 +++---- src/Functions/FunctionsAES.h | 4 +- src/Functions/FunctionsBitToArray.cpp | 2 +- src/Functions/FunctionsCodingIP.cpp | 4 +- src/Functions/FunctionsConsistentHashing.h | 2 +- .../FunctionsLanguageClassification.cpp | 2 +- src/Functions/FunctionsLogical.cpp | 8 +-- src/Functions/FunctionsLogical.h | 42 +++++++------- .../FunctionsProgrammingClassification.cpp | 2 +- src/Functions/FunctionsRound.h | 2 +- src/Functions/FunctionsStringHash.cpp | 20 +++---- src/Functions/FunctionsStringSimilarity.cpp | 8 +-- src/Functions/FunctionsTimeWindow.h | 8 +-- .../FunctionsTonalityClassification.cpp | 2 +- src/Functions/GCDLCMImpl.h | 2 +- src/Functions/GregorianDate.cpp | 10 ++-- src/Functions/PolygonUtils.h | 2 +- src/Functions/TransformDateTime64.h | 8 +-- src/Functions/abs.cpp | 2 +- src/Functions/array/arrayIndex.h | 16 +++--- src/Functions/array/arrayNorm.cpp | 26 ++++----- src/Functions/bitAnd.cpp | 4 +- src/Functions/bitBoolMaskAnd.cpp | 2 +- src/Functions/bitBoolMaskOr.cpp | 2 +- src/Functions/bitCount.cpp | 2 +- src/Functions/bitHammingDistance.cpp | 2 +- src/Functions/bitNot.cpp | 4 +- src/Functions/bitOr.cpp | 4 +- src/Functions/bitRotateLeft.cpp | 4 +- src/Functions/bitRotateRight.cpp | 4 +- src/Functions/bitShiftLeft.cpp | 4 +- src/Functions/bitShiftRight.cpp | 6 +- src/Functions/bitSwapLastTwo.cpp | 4 +- src/Functions/bitTest.cpp | 2 +- src/Functions/bitTestAll.cpp | 2 +- src/Functions/bitTestAny.cpp | 2 +- src/Functions/bitWrapperFunc.cpp | 2 +- src/Functions/bitXor.cpp | 4 +- src/Functions/dateName.cpp | 18 +++--- src/Functions/divide.cpp | 4 +- src/Functions/divideDecimal.cpp | 2 +- src/Functions/factorial.cpp | 2 +- src/Functions/greatCircleDistance.cpp | 10 ++-- src/Functions/greatest.cpp | 6 +- src/Functions/h3GetUnidirectionalEdge.cpp | 2 +- src/Functions/initialQueryID.cpp | 6 +- src/Functions/intDiv.cpp | 2 +- src/Functions/intDivOrZero.cpp | 2 +- src/Functions/intExp10.cpp | 2 +- src/Functions/intExp2.cpp | 4 +- src/Functions/isValidUTF8.cpp | 4 +- src/Functions/jumpConsistentHash.cpp | 2 +- src/Functions/kostikConsistentHash.cpp | 2 +- src/Functions/least.cpp | 6 +- src/Functions/minus.cpp | 6 +- src/Functions/modulo.cpp | 2 +- src/Functions/moduloOrZero.cpp | 2 +- src/Functions/multiply.cpp | 6 +- src/Functions/multiplyDecimal.cpp | 2 +- src/Functions/negate.cpp | 4 +- src/Functions/plus.cpp | 6 +- src/Functions/queryID.cpp | 6 +- src/Functions/repeat.cpp | 4 +- src/Functions/roundAge.cpp | 2 +- src/Functions/roundDuration.cpp | 2 +- src/Functions/roundToExp2.cpp | 2 +- src/Functions/sign.cpp | 2 +- src/Functions/space.cpp | 2 +- src/Functions/tokenExtractors.cpp | 2 +- src/IO/BufferBase.h | 24 ++++---- src/IO/HTTPHeaderEntries.h | 2 +- src/IO/HadoopSnappyReadBuffer.h | 4 +- src/IO/IReadableWriteBuffer.h | 2 +- src/IO/PeekableReadBuffer.h | 6 +- src/IO/ReadBuffer.h | 2 +- src/IO/S3/Requests.h | 2 +- src/IO/WriteBuffer.h | 6 +- src/IO/ZstdDeflatingAppendableWriteBuffer.h | 2 +- src/Interpreters/DDLTask.h | 8 +-- src/Interpreters/DatabaseCatalog.h | 2 +- src/Interpreters/JIT/CHJIT.cpp | 14 ++--- src/Interpreters/JIT/CHJIT.h | 2 +- src/Interpreters/JIT/CompileDAG.h | 16 +++--- src/Interpreters/JoinUtils.h | 2 +- .../examples/hash_map_string_3.cpp | 2 +- .../Impl/CustomSeparatedRowInputFormat.h | 2 +- .../Formats/Impl/TemplateRowInputFormat.h | 2 +- src/Processors/Port.h | 6 +- src/Server/HTTPHandler.h | 6 +- src/Storages/Cache/ExternalDataSourceCache.h | 2 +- src/Storages/Cache/RemoteCacheController.h | 20 +++---- src/Storages/Hive/HiveFile.h | 4 +- src/Storages/Kafka/KafkaConsumer.h | 6 +- .../MergeTree/BackgroundProcessList.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 14 ++--- .../MergeTree/MergeTreeBlockReadUtils.h | 8 +-- .../MergeTree/MergeTreeIndexGranularityInfo.h | 4 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/UVLoop.h | 4 +- src/TableFunctions/ITableFunction.h | 2 +- 159 files changed, 490 insertions(+), 491 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index e2f318562ec..66417c41c46 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -129,7 +129,6 @@ Checks: [ '-readability-avoid-nested-conditional-operator', '-modernize-use-designated-initializers', '-performance-enum-size', - '-readability-redundant-inline-specifier', '-readability-redundant-member-init', '-bugprone-crtp-constructor-accessibility', '-bugprone-suspicious-stringview-data-usage', diff --git a/base/base/BorrowedObjectPool.h b/base/base/BorrowedObjectPool.h index 05a23d5835e..f5ef28582b2 100644 --- a/base/base/BorrowedObjectPool.h +++ b/base/base/BorrowedObjectPool.h @@ -86,7 +86,7 @@ public: } /// Return object into pool. Client must return same object that was borrowed. - inline void returnObject(T && object_to_return) + void returnObject(T && object_to_return) { { std::lock_guard lock(objects_mutex); @@ -99,20 +99,20 @@ public: } /// Max pool size - inline size_t maxSize() const + size_t maxSize() const { return max_size; } /// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full. - inline size_t allocatedObjectsSize() const + size_t allocatedObjectsSize() const { std::lock_guard lock(objects_mutex); return allocated_objects_size; } /// Returns allocatedObjectsSize == maxSize - inline bool isFull() const + bool isFull() const { std::lock_guard lock(objects_mutex); return allocated_objects_size == max_size; @@ -120,7 +120,7 @@ public: /// Borrowed objects size. If borrowedObjectsSize == allocatedObjectsSize and pool is full. /// Then client will wait during borrowObject function call. - inline size_t borrowedObjectsSize() const + size_t borrowedObjectsSize() const { std::lock_guard lock(objects_mutex); return borrowed_objects_size; @@ -129,7 +129,7 @@ public: private: template - inline T allocateObjectForBorrowing(const std::unique_lock &, FactoryFunc && func) + T allocateObjectForBorrowing(const std::unique_lock &, FactoryFunc && func) { ++allocated_objects_size; ++borrowed_objects_size; @@ -137,7 +137,7 @@ private: return std::forward(func)(); } - inline T borrowFromObjects(const std::unique_lock &) + T borrowFromObjects(const std::unique_lock &) { T dst; detail::moveOrCopyIfThrow(std::move(objects.back()), dst); diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h index 1db71eb24cb..62fbf2caede 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.h +++ b/programs/library-bridge/LibraryBridgeHandlers.h @@ -23,7 +23,7 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - static constexpr inline auto FORMAT = "RowBinary"; + static constexpr auto FORMAT = "RowBinary"; const size_t keep_alive_timeout; LoggerPtr log; diff --git a/programs/server/MetricsTransmitter.h b/programs/server/MetricsTransmitter.h index 23420117b56..24069a60071 100644 --- a/programs/server/MetricsTransmitter.h +++ b/programs/server/MetricsTransmitter.h @@ -56,10 +56,10 @@ private: std::condition_variable cond; std::optional thread; - static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; - static inline constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative."; - static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; - static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; + static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; + static constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative."; + static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; + static constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics."; }; } diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp index bed10333af0..b3824720b04 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp @@ -341,7 +341,7 @@ public: value[i] = Node::read(buf, arena); } - inline std::optional getBaseIndex(Data & data) const + std::optional getBaseIndex(Data & data) const { if (data.value.size() == 0) return {}; diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp index 9b5ee79a533..3e21ffa3418 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp @@ -73,7 +73,7 @@ private: using Base = AggregateFunctionNullBase>; - inline bool singleFilter(const IColumn ** columns, size_t row_num) const + bool singleFilter(const IColumn ** columns, size_t row_num) const { const IColumn * filter_column = columns[num_arguments - 1]; @@ -261,7 +261,7 @@ public: filter_is_only_null = arguments.back()->onlyNull(); } - static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) + static bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) { return assert_cast(*columns[num_arguments - 1]).getData()[row_num]; } diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index 9d84f079daa..d5a4f6b576a 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -138,7 +138,7 @@ class QuantileTDigest compress(); } - inline bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean) + bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean) { return l_mean == r_mean || (!std::isinf(l_mean) && !std::isinf(r_mean)); } diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h index 45fbf38258f..eef15828fc0 100644 --- a/src/AggregateFunctions/QuantileTiming.h +++ b/src/AggregateFunctions/QuantileTiming.h @@ -262,7 +262,7 @@ namespace detail UInt64 count_big[BIG_SIZE]; /// Get value of quantile by index in array `count_big`. - static inline UInt16 indexInBigToValue(size_t i) + static UInt16 indexInBigToValue(size_t i) { return (i * BIG_PRECISION) + SMALL_THRESHOLD + (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2)); /// A small randomization so that it is not noticeable that all the values are even. diff --git a/src/AggregateFunctions/ThetaSketchData.h b/src/AggregateFunctions/ThetaSketchData.h index f32386d945b..99dca27673d 100644 --- a/src/AggregateFunctions/ThetaSketchData.h +++ b/src/AggregateFunctions/ThetaSketchData.h @@ -24,14 +24,14 @@ private: std::unique_ptr sk_update; std::unique_ptr sk_union; - inline datasketches::update_theta_sketch * getSkUpdate() + datasketches::update_theta_sketch * getSkUpdate() { if (!sk_update) sk_update = std::make_unique(datasketches::update_theta_sketch::builder().build()); return sk_update.get(); } - inline datasketches::theta_union * getSkUnion() + datasketches::theta_union * getSkUnion() { if (!sk_union) sk_union = std::make_unique(datasketches::theta_union::builder().build()); diff --git a/src/AggregateFunctions/UniqVariadicHash.h b/src/AggregateFunctions/UniqVariadicHash.h index 840380e7f0f..5bb245397d4 100644 --- a/src/AggregateFunctions/UniqVariadicHash.h +++ b/src/AggregateFunctions/UniqVariadicHash.h @@ -38,7 +38,7 @@ bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types); template <> struct UniqVariadicHash { - static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) { UInt64 hash; @@ -65,7 +65,7 @@ struct UniqVariadicHash template <> struct UniqVariadicHash { - static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) { UInt64 hash; @@ -94,7 +94,7 @@ struct UniqVariadicHash template <> struct UniqVariadicHash { - static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) { const IColumn ** column = columns; const IColumn ** columns_end = column + num_args; @@ -114,7 +114,7 @@ struct UniqVariadicHash template <> struct UniqVariadicHash { - static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) + static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) { const auto & tuple_columns = assert_cast(columns[0])->getColumns(); diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h index d6fc2bb6634..d5241547711 100644 --- a/src/AggregateFunctions/UniquesHashSet.h +++ b/src/AggregateFunctions/UniquesHashSet.h @@ -105,14 +105,14 @@ private: } } - inline size_t buf_size() const { return 1ULL << size_degree; } /// NOLINT - inline size_t max_fill() const { return 1ULL << (size_degree - 1); } /// NOLINT - inline size_t mask() const { return buf_size() - 1; } + size_t buf_size() const { return 1ULL << size_degree; } /// NOLINT + size_t max_fill() const { return 1ULL << (size_degree - 1); } /// NOLINT + size_t mask() const { return buf_size() - 1; } - inline size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); } + size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); } /// The value is divided by 2 ^ skip_degree - inline bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); } + bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); } HashValue hash(Value key) const { return static_cast(Hash()(key)); } diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index f96ba22eb7a..9153bc4eca2 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -173,13 +173,13 @@ private: return arithmetic_function_clone; } - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) + static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) { auto function_aggregate_function = function_node.getAggregateFunction(); diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index f8233f473f8..ebefc12ae53 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -184,7 +184,7 @@ private: return result_function; } - inline QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const + QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const { return makeComparisonFunction(std::move(lhs_argument), std::move(rhs_argument), "equals"); } diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 6248f462979..15ac8d642a4 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -215,7 +215,7 @@ public: } private: - inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const { auto function = FunctionFactory::instance().get(function_name, getContext()); function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 0d6f3fc2d87..e70e08e65f4 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -59,7 +59,7 @@ public: } } private: - static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) + static void resolveAsCountAggregateFunction(FunctionNode & function_node) { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties); diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 513dd0054d6..a82ad3dced1 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -108,7 +108,7 @@ public: } private: - static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) + static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types) { auto result_type = function_node.getResultType(); diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 917256bf4b1..5646d26f7f6 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -110,7 +110,7 @@ private: function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); } - static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) + static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) { AggregateFunctionProperties properties; const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1a4712aa697..852cbe75c4a 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -156,7 +156,7 @@ public: } private: - static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) + static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get( @@ -165,7 +165,7 @@ private: function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } - inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) + QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) { auto multiply_function_node = std::make_shared("multiply"); auto & multiply_arguments_nodes = multiply_function_node->getArguments().getNodes(); diff --git a/src/BridgeHelper/CatBoostLibraryBridgeHelper.h b/src/BridgeHelper/CatBoostLibraryBridgeHelper.h index 55dfd715f00..5d5c6d01705 100644 --- a/src/BridgeHelper/CatBoostLibraryBridgeHelper.h +++ b/src/BridgeHelper/CatBoostLibraryBridgeHelper.h @@ -14,8 +14,8 @@ namespace DB class CatBoostLibraryBridgeHelper final : public LibraryBridgeHelper { public: - static constexpr inline auto PING_HANDLER = "/catboost_ping"; - static constexpr inline auto MAIN_HANDLER = "/catboost_request"; + static constexpr auto PING_HANDLER = "/catboost_ping"; + static constexpr auto MAIN_HANDLER = "/catboost_request"; explicit CatBoostLibraryBridgeHelper( ContextPtr context_, @@ -38,11 +38,11 @@ protected: bool bridgeHandShake() override; private: - static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list"; - static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel"; - static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels"; - static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount"; - static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate"; + static constexpr auto CATBOOST_LIST_METHOD = "catboost_list"; + static constexpr auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel"; + static constexpr auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels"; + static constexpr auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount"; + static constexpr auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate"; Poco::URI createRequestURI(const String & method) const; diff --git a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h index 5632fd2a28e..63816aa63ef 100644 --- a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h +++ b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h @@ -25,8 +25,8 @@ public: String dict_attributes; }; - static constexpr inline auto PING_HANDLER = "/extdict_ping"; - static constexpr inline auto MAIN_HANDLER = "/extdict_request"; + static constexpr auto PING_HANDLER = "/extdict_ping"; + static constexpr auto MAIN_HANDLER = "/extdict_request"; ExternalDictionaryLibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_); @@ -62,14 +62,14 @@ protected: ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const; private: - static constexpr inline auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew"; - static constexpr inline auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone"; - static constexpr inline auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete"; - static constexpr inline auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll"; - static constexpr inline auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds"; - static constexpr inline auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys"; - static constexpr inline auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified"; - static constexpr inline auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad"; + static constexpr auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew"; + static constexpr auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone"; + static constexpr auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete"; + static constexpr auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll"; + static constexpr auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds"; + static constexpr auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys"; + static constexpr auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified"; + static constexpr auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad"; Poco::URI createRequestURI(const String & method) const; diff --git a/src/BridgeHelper/IBridgeHelper.h b/src/BridgeHelper/IBridgeHelper.h index 6812bd04a03..8ce1c0e143a 100644 --- a/src/BridgeHelper/IBridgeHelper.h +++ b/src/BridgeHelper/IBridgeHelper.h @@ -16,9 +16,9 @@ class IBridgeHelper: protected WithContext { public: - static constexpr inline auto DEFAULT_HOST = "127.0.0.1"; - static constexpr inline auto DEFAULT_FORMAT = "RowBinary"; - static constexpr inline auto PING_OK_ANSWER = "Ok."; + static constexpr auto DEFAULT_HOST = "127.0.0.1"; + static constexpr auto DEFAULT_FORMAT = "RowBinary"; + static constexpr auto PING_OK_ANSWER = "Ok."; static const inline std::string PING_METHOD = Poco::Net::HTTPRequest::HTTP_GET; static const inline std::string MAIN_METHOD = Poco::Net::HTTPRequest::HTTP_POST; diff --git a/src/BridgeHelper/LibraryBridgeHelper.h b/src/BridgeHelper/LibraryBridgeHelper.h index 8940f9d1c9e..0c56fe7a221 100644 --- a/src/BridgeHelper/LibraryBridgeHelper.h +++ b/src/BridgeHelper/LibraryBridgeHelper.h @@ -37,7 +37,7 @@ protected: Poco::URI createBaseURI() const override; - static constexpr inline size_t DEFAULT_PORT = 9012; + static constexpr size_t DEFAULT_PORT = 9012; const Poco::Util::AbstractConfiguration & config; LoggerPtr log; diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h index b557e12b85b..5f4c7fd8381 100644 --- a/src/BridgeHelper/XDBCBridgeHelper.h +++ b/src/BridgeHelper/XDBCBridgeHelper.h @@ -52,12 +52,12 @@ class XDBCBridgeHelper : public IXDBCBridgeHelper { public: - static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT; - static constexpr inline auto PING_HANDLER = "/ping"; - static constexpr inline auto MAIN_HANDLER = "/"; - static constexpr inline auto COL_INFO_HANDLER = "/columns_info"; - static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; - static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed"; + static constexpr auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT; + static constexpr auto PING_HANDLER = "/ping"; + static constexpr auto MAIN_HANDLER = "/"; + static constexpr auto COL_INFO_HANDLER = "/columns_info"; + static constexpr auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote"; + static constexpr auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed"; XDBCBridgeHelper( ContextPtr context_, @@ -256,7 +256,7 @@ protected: struct JDBCBridgeMixin { - static constexpr inline auto DEFAULT_PORT = 9019; + static constexpr auto DEFAULT_PORT = 9019; static String configPrefix() { @@ -287,7 +287,7 @@ struct JDBCBridgeMixin struct ODBCBridgeMixin { - static constexpr inline auto DEFAULT_PORT = 9018; + static constexpr auto DEFAULT_PORT = 9018; static String configPrefix() { diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h index d7a714ec5af..b49f7706904 100644 --- a/src/Common/CPUID.h +++ b/src/Common/CPUID.h @@ -69,9 +69,9 @@ union CPUInfo UInt32 edx; } registers; - inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } + explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } - inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } + CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } }; inline bool haveRDTSCP() noexcept diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h index f74a56292ae..0e013decf1f 100644 --- a/src/Common/ColumnsHashingImpl.h +++ b/src/Common/ColumnsHashingImpl.h @@ -453,7 +453,7 @@ protected: /// Return the columns which actually contain the values of the keys. /// For a given key column, if it is nullable, we return its nested /// column. Otherwise we return the key column itself. - inline const ColumnRawPtrs & getActualColumns() const + const ColumnRawPtrs & getActualColumns() const { return actual_columns; } diff --git a/src/Common/CombinedCardinalityEstimator.h b/src/Common/CombinedCardinalityEstimator.h index 0e53755d773..132f00de8eb 100644 --- a/src/Common/CombinedCardinalityEstimator.h +++ b/src/Common/CombinedCardinalityEstimator.h @@ -292,13 +292,13 @@ private: } template - inline T & getContainer() + T & getContainer() { return *reinterpret_cast(address & mask); } template - inline const T & getContainer() const + const T & getContainer() const { return *reinterpret_cast(address & mask); } @@ -309,7 +309,7 @@ private: address |= static_cast(t); } - inline details::ContainerType getContainerType() const + details::ContainerType getContainerType() const { return static_cast(address & ~mask); } diff --git a/src/Common/CompactArray.h b/src/Common/CompactArray.h index 613dc3d0b90..7b2bd658d2e 100644 --- a/src/Common/CompactArray.h +++ b/src/Common/CompactArray.h @@ -116,7 +116,7 @@ public: /** Return the current cell number and the corresponding content. */ - inline std::pair get() const + std::pair get() const { if ((current_bucket_index == 0) || is_eof) throw Exception(ErrorCodes::NO_AVAILABLE_DATA, "No available data."); diff --git a/src/Common/CounterInFile.h b/src/Common/CounterInFile.h index 854bf7cc675..0a11e52be2c 100644 --- a/src/Common/CounterInFile.h +++ b/src/Common/CounterInFile.h @@ -37,7 +37,7 @@ namespace fs = std::filesystem; class CounterInFile { private: - static inline constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16; + static constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16; public: /// path - the name of the file, including the path diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index e2b627a7f29..8dade8c6fd5 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -62,9 +62,9 @@ public: static void updatePerformanceCountersIfNeeded(); static ProfileEvents::Counters & getProfileEvents(); - inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker() + static MemoryTracker * getMemoryTracker() { - if (unlikely(!current_thread)) + if (!current_thread) [[unlikely]] return nullptr; return ¤t_thread->memory_tracker; } diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h index 49675aaafbc..8f6ec1604ee 100644 --- a/src/Common/HashTable/FixedHashTable.h +++ b/src/Common/HashTable/FixedHashTable.h @@ -261,7 +261,7 @@ public: return true; } - inline const value_type & get() const + const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 9050b7ef6d7..a600f57b06a 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -844,7 +844,7 @@ public: return true; } - inline const value_type & get() const + const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); diff --git a/src/Common/HashTable/PackedHashMap.h b/src/Common/HashTable/PackedHashMap.h index 0d25addb58e..72eb721b274 100644 --- a/src/Common/HashTable/PackedHashMap.h +++ b/src/Common/HashTable/PackedHashMap.h @@ -69,7 +69,7 @@ struct PackedHashMapCell : public HashMapCellvalue.first, state); } static bool isZero(const Key key, const State & /*state*/) { return ZeroTraits::check(key); } - static inline bool bitEqualsByValue(key_type a, key_type b) { return a == b; } + static bool bitEqualsByValue(key_type a, key_type b) { return a == b; } template auto get() const diff --git a/src/Common/HashTable/SmallTable.h b/src/Common/HashTable/SmallTable.h index 3229e4748ea..63a6b932dd0 100644 --- a/src/Common/HashTable/SmallTable.h +++ b/src/Common/HashTable/SmallTable.h @@ -112,7 +112,7 @@ public: return true; } - inline const value_type & get() const + const value_type & get() const { if (!is_initialized || is_eof) throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data"); diff --git a/src/Common/HyperLogLogCounter.h b/src/Common/HyperLogLogCounter.h index bacd4cc7288..9b2b33dc918 100644 --- a/src/Common/HyperLogLogCounter.h +++ b/src/Common/HyperLogLogCounter.h @@ -128,13 +128,13 @@ public: { } - inline void update(UInt8 cur_rank, UInt8 new_rank) + void update(UInt8 cur_rank, UInt8 new_rank) { denominator -= static_cast(1.0) / (1ULL << cur_rank); denominator += static_cast(1.0) / (1ULL << new_rank); } - inline void update(UInt8 rank) + void update(UInt8 rank) { denominator += static_cast(1.0) / (1ULL << rank); } @@ -166,13 +166,13 @@ public: rank_count[0] = static_cast(initial_value); } - inline void update(UInt8 cur_rank, UInt8 new_rank) + void update(UInt8 cur_rank, UInt8 new_rank) { --rank_count[cur_rank]; ++rank_count[new_rank]; } - inline void update(UInt8 rank) + void update(UInt8 rank) { ++rank_count[rank]; } @@ -429,13 +429,13 @@ public: private: /// Extract subset of bits in [begin, end[ range. - inline HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const + HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const { return (val >> begin) & ((1ULL << (end - begin)) - 1); } /// Rank is number of trailing zeros. - inline UInt8 calculateRank(HashValueType val) const + UInt8 calculateRank(HashValueType val) const { if (unlikely(val == 0)) return max_rank; @@ -448,7 +448,7 @@ private: return zeros_plus_one; } - inline HashValueType getHash(Value key) const + HashValueType getHash(Value key) const { /// NOTE: this should be OK, since value is the same as key for HLL. return static_cast( @@ -496,7 +496,7 @@ private: throw Poco::Exception("Internal error", DB::ErrorCodes::LOGICAL_ERROR); } - inline double applyCorrection(double raw_estimate) const + double applyCorrection(double raw_estimate) const { double fixed_estimate; @@ -525,7 +525,7 @@ private: /// Correction used in HyperLogLog++ algorithm. /// Source: "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm" /// (S. Heule et al., Proceedings of the EDBT 2013 Conference). - inline double applyBiasCorrection(double raw_estimate) const + double applyBiasCorrection(double raw_estimate) const { double fixed_estimate; @@ -540,7 +540,7 @@ private: /// Calculation of unique values using LinearCounting algorithm. /// Source: "A Linear-time Probabilistic Counting Algorithm for Database Applications" /// (Whang et al., ACM Trans. Database Syst., pp. 208-229, 1990). - inline double applyLinearCorrection(double raw_estimate) const + double applyLinearCorrection(double raw_estimate) const { double fixed_estimate; diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index fbd1de3197e..db7f5238921 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -23,7 +23,7 @@ struct Interval Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { } - inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; } + bool contains(IntervalStorageType point) const { return left <= point && point <= right; } }; template @@ -290,7 +290,7 @@ private: IntervalStorageType middle_element; - inline bool hasValue() const { return sorted_intervals_range_size != 0; } + bool hasValue() const { return sorted_intervals_range_size != 0; } }; using IntervalWithEmptyValue = Interval; @@ -585,7 +585,7 @@ private: } } - inline size_t findFirstIteratorNodeIndex() const + size_t findFirstIteratorNodeIndex() const { size_t nodes_size = nodes.size(); size_t result_index = 0; @@ -602,7 +602,7 @@ private: return result_index; } - inline size_t findLastIteratorNodeIndex() const + size_t findLastIteratorNodeIndex() const { if (unlikely(nodes.empty())) return 0; @@ -618,7 +618,7 @@ private: return result_index; } - inline void increaseIntervalsSize() + void increaseIntervalsSize() { /// Before tree is build we store all intervals size in our first node to allow tree iteration. ++intervals_size; @@ -630,7 +630,7 @@ private: size_t intervals_size = 0; bool tree_is_built = false; - static inline const Interval & getInterval(const IntervalWithValue & interval_with_value) + static const Interval & getInterval(const IntervalWithValue & interval_with_value) { if constexpr (is_empty_value) return interval_with_value; @@ -639,7 +639,7 @@ private: } template - static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback) + static bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback) { if constexpr (is_empty_value) return callback(interval); @@ -647,7 +647,7 @@ private: return callback(interval.first, interval.second); } - static inline void + static void intervalsToPoints(const std::vector & intervals, std::vector & temporary_points_storage) { for (const auto & interval_with_value : intervals) @@ -658,7 +658,7 @@ private: } } - static inline IntervalStorageType pointsMedian(std::vector & points) + static IntervalStorageType pointsMedian(std::vector & points) { size_t size = points.size(); size_t middle_element_index = size / 2; diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index a8594710d20..827d142266a 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -26,62 +26,62 @@ class SimdJSONBasicFormatter { public: explicit SimdJSONBasicFormatter(PaddedPODArray & buffer_) : buffer(buffer_) {} - inline void comma() { oneChar(','); } + void comma() { oneChar(','); } /** Start an array, prints [ **/ - inline void startArray() { oneChar('['); } + void startArray() { oneChar('['); } /** End an array, prints ] **/ - inline void endArray() { oneChar(']'); } + void endArray() { oneChar(']'); } /** Start an array, prints { **/ - inline void startObject() { oneChar('{'); } + void startObject() { oneChar('{'); } /** Start an array, prints } **/ - inline void endObject() { oneChar('}'); } + void endObject() { oneChar('}'); } /** Prints a true **/ - inline void trueAtom() + void trueAtom() { const char * s = "true"; buffer.insert(s, s + 4); } /** Prints a false **/ - inline void falseAtom() + void falseAtom() { const char * s = "false"; buffer.insert(s, s + 5); } /** Prints a null **/ - inline void nullAtom() + void nullAtom() { const char * s = "null"; buffer.insert(s, s + 4); } /** Prints a number **/ - inline void number(int64_t x) + void number(int64_t x) { char number_buffer[24]; auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x); buffer.insert(number_buffer, res.ptr); } /** Prints a number **/ - inline void number(uint64_t x) + void number(uint64_t x) { char number_buffer[24]; auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x); buffer.insert(number_buffer, res.ptr); } /** Prints a number **/ - inline void number(double x) + void number(double x) { char number_buffer[24]; auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x); buffer.insert(number_buffer, res.ptr); } /** Prints a key (string + colon) **/ - inline void key(std::string_view unescaped) + void key(std::string_view unescaped) { string(unescaped); oneChar(':'); } /** Prints a string. The string is escaped as needed. **/ - inline void string(std::string_view unescaped) + void string(std::string_view unescaped) { oneChar('\"'); size_t i = 0; @@ -165,7 +165,7 @@ public: oneChar('\"'); } - inline void oneChar(char c) + void oneChar(char c) { buffer.push_back(c); } @@ -182,7 +182,7 @@ class SimdJSONElementFormatter public: explicit SimdJSONElementFormatter(PaddedPODArray & buffer_) : format(buffer_) {} /** Append an element to the builder (to be printed) **/ - inline void append(simdjson::dom::element value) + void append(simdjson::dom::element value) { switch (value.type()) { @@ -224,7 +224,7 @@ public: } } /** Append an array to the builder (to be printed) **/ - inline void append(simdjson::dom::array value) + void append(simdjson::dom::array value) { format.startArray(); auto iter = value.begin(); @@ -241,7 +241,7 @@ public: format.endArray(); } - inline void append(simdjson::dom::object value) + void append(simdjson::dom::object value) { format.startObject(); auto pair = value.begin(); @@ -258,7 +258,7 @@ public: format.endObject(); } - inline void append(simdjson::dom::key_value_pair kv) + void append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index b4069027ad1..ece5114a998 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -284,7 +284,7 @@ public: } template - inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) + void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) { #if !defined(NDEBUG) const char * ptr_begin = reinterpret_cast(&*from_begin); diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index d6fc1656eca..fb0c75e7c95 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -174,7 +174,7 @@ public: items.emplace_back(std::make_shared(allocObject(), *this)); } - inline size_t size() + size_t size() { std::lock_guard lock(mutex); return items.size(); diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index a30e19d8212..238321ec76e 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -385,7 +385,7 @@ private: * PASS is counted from least significant (0), so the first pass is NUM_PASSES - 1. */ template - static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit) + static void radixSortMSDInternal(Element * arr, size_t size, size_t limit) { /// The beginning of every i-1-th bucket. 0th element will be equal to 1st. /// Last element will point to array end. @@ -528,7 +528,7 @@ private: // A helper to choose sorting algorithm based on array length template - static inline void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit) + static void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit) { if (size <= INSERTION_SORT_THRESHOLD) insertionSortInternal(arr, size); diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h index 7a740ae6c9b..81ac4e71e8c 100644 --- a/src/Common/SpaceSaving.h +++ b/src/Common/SpaceSaving.h @@ -131,12 +131,12 @@ public: ~SpaceSaving() { destroyElements(); } - inline size_t size() const + size_t size() const { return counter_list.size(); } - inline size_t capacity() const + size_t capacity() const { return m_capacity; } diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h index 26aeab08302..0af3ccb4c80 100644 --- a/src/Common/ThreadProfileEvents.h +++ b/src/Common/ThreadProfileEvents.h @@ -107,7 +107,7 @@ struct RUsageCounters } private: - static inline UInt64 getClockMonotonic() + static UInt64 getClockMonotonic() { struct timespec ts; if (0 != clock_gettime(CLOCK_MONOTONIC, &ts)) diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 6513bdb8bc3..9c2852e4a10 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -54,16 +54,16 @@ namespace VolnitskyTraits /// min haystack size to use main algorithm instead of fallback static constexpr size_t min_haystack_size_for_algorithm = 20000; - static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0) + static bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0) { return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits::max() || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm); } - static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad(pos); } + static Ngram toNGram(const UInt8 * const pos) { return unalignedLoad(pos); } template - static inline bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase) + static bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase) { struct Chars { @@ -115,7 +115,7 @@ namespace VolnitskyTraits } template - static inline bool putNGramUTF8CaseInsensitive( + static bool putNGramUTF8CaseInsensitive( const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase) { const UInt8 * end = begin + size; @@ -349,7 +349,7 @@ namespace VolnitskyTraits } template - static inline bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase) + static bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase) { if constexpr (CaseSensitive) { @@ -580,7 +580,7 @@ public: return true; } - inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const + bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const { const size_t fallback_size = fallback_needles.size(); for (size_t i = 0; i < fallback_size; ++i) @@ -609,7 +609,7 @@ public: return false; } - inline size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const + size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const { const size_t fallback_size = fallback_needles.size(); @@ -647,7 +647,7 @@ public: } template - inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const + UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const { const size_t fallback_size = fallback_needles.size(); @@ -682,7 +682,7 @@ public: } template - inline void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const + void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const { const size_t fallback_size = fallback_needles.size(); for (size_t i = 0; i < fallback_size; ++i) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index ec49c94808e..ddd30c4eef2 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -491,12 +491,12 @@ public: incrementErrorMetrics(code); } - inline static Exception createDeprecated(const std::string & msg, Error code_) + static Exception createDeprecated(const std::string & msg, Error code_) { return Exception(msg, code_, 0); } - inline static Exception fromPath(Error code_, const std::string & path) + static Exception fromPath(Error code_, const std::string & path) { return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path); } @@ -504,7 +504,7 @@ public: /// Message must be a compile-time constant template requires std::is_convertible_v - inline static Exception fromMessage(Error code_, T && message) + static Exception fromMessage(Error code_, T && message) { return Exception(std::forward(message), code_); } diff --git a/src/Common/findExtreme.cpp b/src/Common/findExtreme.cpp index ce3bbb86d7c..a99b1f2dd3d 100644 --- a/src/Common/findExtreme.cpp +++ b/src/Common/findExtreme.cpp @@ -11,13 +11,13 @@ namespace DB template struct MinComparator { - static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::min(a, b); } + static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::min(a, b); } }; template struct MaxComparator { - static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::max(a, b); } + static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::max(a, b); } }; MULTITARGET_FUNCTION_AVX2_SSE42( diff --git a/src/Core/Field.h b/src/Core/Field.h index 4424d669c4d..73d3f4ec44e 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -855,13 +855,13 @@ template <> struct Field::EnumToType { usi template <> struct Field::EnumToType { using Type = CustomType; }; template <> struct Field::EnumToType { using Type = UInt64; }; -inline constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) +constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) { return t == Field::Types::Int64 || t == Field::Types::UInt64; } -inline constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) +constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) { return t == Field::Types::Int64 || t == Field::Types::UInt64 diff --git a/src/Core/Joins.h b/src/Core/Joins.h index ccdd6eefab7..96d2b51325c 100644 --- a/src/Core/Joins.h +++ b/src/Core/Joins.h @@ -19,16 +19,16 @@ enum class JoinKind : uint8_t const char * toString(JoinKind kind); -inline constexpr bool isLeft(JoinKind kind) { return kind == JoinKind::Left; } -inline constexpr bool isRight(JoinKind kind) { return kind == JoinKind::Right; } -inline constexpr bool isInner(JoinKind kind) { return kind == JoinKind::Inner; } -inline constexpr bool isFull(JoinKind kind) { return kind == JoinKind::Full; } -inline constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; } -inline constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::Right || kind == JoinKind::Full; } -inline constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; } -inline constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; } -inline constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; } -inline constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; } +constexpr bool isLeft(JoinKind kind) { return kind == JoinKind::Left; } +constexpr bool isRight(JoinKind kind) { return kind == JoinKind::Right; } +constexpr bool isInner(JoinKind kind) { return kind == JoinKind::Inner; } +constexpr bool isFull(JoinKind kind) { return kind == JoinKind::Full; } +constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; } +constexpr bool isRightOrFull(JoinKind kind) { return kind == JoinKind::Right || kind == JoinKind::Full; } +constexpr bool isLeftOrFull(JoinKind kind) { return kind == JoinKind::Left || kind == JoinKind::Full; } +constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; } +constexpr bool isInnerOrLeft(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Left; } +constexpr bool isPaste(JoinKind kind) { return kind == JoinKind::Paste; } /// Allows more optimal JOIN for typical cases. enum class JoinStrictness : uint8_t @@ -66,7 +66,7 @@ enum class ASOFJoinInequality : uint8_t const char * toString(ASOFJoinInequality asof_join_inequality); -inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name) +constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name) { ASOFJoinInequality inequality = ASOFJoinInequality::None; @@ -82,7 +82,7 @@ inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_ return inequality; } -inline constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality) +constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality) { if (inequality == ASOFJoinInequality::Less) return ASOFJoinInequality::Greater; diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index a0f47c44460..3d34d404595 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -40,7 +40,7 @@ class BaseDaemon : public Poco::Util::ServerApplication, public Loggers friend class SignalListener; public: - static inline constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite"; + static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite"; BaseDaemon(); ~BaseDaemon() override; diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index 642d2de833f..997c554059b 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -147,7 +147,7 @@ public: static T getScaleMultiplier(UInt32 scale); - inline DecimalUtils::DataTypeDecimalTrait getTrait() const + DecimalUtils::DataTypeDecimalTrait getTrait() const { return {precision, scale}; } diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 01217c58e31..a960a916027 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -754,7 +754,7 @@ private: std::vector attributes; - inline void setCellDeadline(Cell & cell, TimePoint now) + void setCellDeadline(Cell & cell, TimePoint now) { if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0) { @@ -774,7 +774,7 @@ private: cell.deadline = std::chrono::system_clock::to_time_t(deadline); } - inline size_t getCellIndex(const KeyType key) const + size_t getCellIndex(const KeyType key) const { const size_t hash = DefaultHash()(key); const size_t index = hash & size_overlap_mask; @@ -783,7 +783,7 @@ private: using KeyStateAndCellIndex = std::pair; - inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const + KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const { size_t place_value = getCellIndex(key); const size_t place_value_end = place_value + max_collision_length; @@ -810,7 +810,7 @@ private: return std::make_pair(KeyState::not_found, place_value & size_overlap_mask); } - inline size_t getCellIndexForInsert(const KeyType & key) const + size_t getCellIndexForInsert(const KeyType & key) const { size_t place_value = getCellIndex(key); const size_t place_value_end = place_value + max_collision_length; diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 8bf190d3edc..64fc05e99ab 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -44,7 +44,7 @@ public: { } - inline bool isConstant() const { return default_values_column == nullptr; } + bool isConstant() const { return default_values_column == nullptr; } Field getDefaultValue(size_t row) const { @@ -450,17 +450,17 @@ public: keys_size = key_columns.front()->size(); } - inline size_t getKeysSize() const + size_t getKeysSize() const { return keys_size; } - inline size_t getCurrentKeyIndex() const + size_t getCurrentKeyIndex() const { return current_key_index; } - inline KeyType extractCurrentKey() + KeyType extractCurrentKey() { assert(current_key_index < keys_size); diff --git a/src/Dictionaries/Embedded/RegionsNames.h b/src/Dictionaries/Embedded/RegionsNames.h index 0053c74745a..0e4c1fe8b88 100644 --- a/src/Dictionaries/Embedded/RegionsNames.h +++ b/src/Dictionaries/Embedded/RegionsNames.h @@ -48,14 +48,14 @@ public: }; private: - static inline constexpr const char * languages[] = + static constexpr const char * languages[] = { #define M(NAME, FALLBACK, NUM) #NAME, FOR_EACH_LANGUAGE(M) #undef M }; - static inline constexpr Language fallbacks[] = + static constexpr Language fallbacks[] = { #define M(NAME, FALLBACK, NUM) Language::FALLBACK, FOR_EACH_LANGUAGE(M) diff --git a/src/Dictionaries/ICacheDictionaryStorage.h b/src/Dictionaries/ICacheDictionaryStorage.h index dcd7434946f..532154cd190 100644 --- a/src/Dictionaries/ICacheDictionaryStorage.h +++ b/src/Dictionaries/ICacheDictionaryStorage.h @@ -26,15 +26,15 @@ struct KeyState : state(state_) {} - inline bool isFound() const { return state == State::found; } - inline bool isExpired() const { return state == State::expired; } - inline bool isNotFound() const { return state == State::not_found; } - inline bool isDefault() const { return is_default; } - inline void setDefault() { is_default = true; } - inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; } + bool isFound() const { return state == State::found; } + bool isExpired() const { return state == State::expired; } + bool isNotFound() const { return state == State::not_found; } + bool isDefault() const { return is_default; } + void setDefault() { is_default = true; } + void setDefaultValue(bool is_default_value) { is_default = is_default_value; } /// Valid only if keyState is found or expired - inline size_t getFetchedColumnIndex() const { return fetched_column_index; } - inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; } + size_t getFetchedColumnIndex() const { return fetched_column_index; } + void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; } private: State state = not_found; size_t fetched_column_index = 0; diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 1bc6d16c932..a67118caaf8 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -66,7 +66,7 @@ namespace return buf; } - inline UInt8 prefixIPv6() const + UInt8 prefixIPv6() const { return isv6 ? prefix : prefix + 96; } diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 2e93a8e6001..ab999202e42 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -474,7 +474,7 @@ public: } // Checks if no more values can be added for a given attribute - inline bool full(const String & attr_name, std::unordered_set * const defaults = nullptr) const + bool full(const String & attr_name, std::unordered_set * const defaults = nullptr) const { if (collect_values_limit) { @@ -490,7 +490,7 @@ public: } // Returns the number of full attributes - inline size_t attributesFull() const { return n_full_attributes; } + size_t attributesFull() const { return n_full_attributes; } }; std::pair processBackRefs(const String & data, const re2::RE2 & searcher, const std::vector & pieces) diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index e3eea71cd9a..cb0ade9b899 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -134,7 +134,7 @@ public: /// Reset block with new block_data /// block_data must be filled with zeroes if it is new block - inline void reset(char * new_block_data) + void reset(char * new_block_data) { block_data = new_block_data; current_block_offset = block_header_size; @@ -142,13 +142,13 @@ public: } /// Check if it is enough place to write key in block - inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const + bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const { return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size; } /// Check if it is enough place to write key in block - inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const + bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const { const StringRef & key = cache_key.key; size_t complex_key_size = sizeof(key.size) + key.size; @@ -159,7 +159,7 @@ public: /// Write key and returns offset in ssd cache block where data is written /// It is client responsibility to check if there is enough place in block to write key /// Returns true if key was written and false if there was not enough place to write key - inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block) + bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block) { assert(cache_key.size > 0); @@ -188,7 +188,7 @@ public: return true; } - inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block) + bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block) { assert(cache_key.size > 0); @@ -223,20 +223,20 @@ public: return true; } - inline size_t getKeysSize() const { return keys_size; } + size_t getKeysSize() const { return keys_size; } /// Write keys size into block header - inline void writeKeysSize() + void writeKeysSize() { char * keys_size_offset_data = block_data + block_header_check_sum_size; std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t)); } /// Get check sum from block header - inline size_t getCheckSum() const { return unalignedLoad(block_data); } + size_t getCheckSum() const { return unalignedLoad(block_data); } /// Calculate check sum in block - inline size_t calculateCheckSum() const + size_t calculateCheckSum() const { size_t calculated_check_sum = static_cast(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size)); @@ -244,7 +244,7 @@ public: } /// Check if check sum from block header matched calculated check sum in block - inline bool checkCheckSum() const + bool checkCheckSum() const { size_t calculated_check_sum = calculateCheckSum(); size_t check_sum = getCheckSum(); @@ -253,16 +253,16 @@ public: } /// Write check sum in block header - inline void writeCheckSum() + void writeCheckSum() { size_t check_sum = static_cast(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size)); std::memcpy(block_data, &check_sum, sizeof(size_t)); } - inline size_t getBlockSize() const { return block_size; } + size_t getBlockSize() const { return block_size; } /// Returns block data - inline char * getBlockData() const { return block_data; } + char * getBlockData() const { return block_data; } /// Read keys that were serialized in block /// It is client responsibility to ensure that simple or complex keys were written in block @@ -405,16 +405,16 @@ public: current_write_block.writeCheckSum(); } - inline char * getPlace(SSDCacheIndex index) const + char * getPlace(SSDCacheIndex index) const { return buffer.m_data + index.block_index * block_size + index.offset_in_block; } - inline size_t getCurrentBlockIndex() const { return current_block_index; } + size_t getCurrentBlockIndex() const { return current_block_index; } - inline const char * getData() const { return buffer.m_data; } + const char * getData() const { return buffer.m_data; } - inline size_t getSizeInBytes() const { return block_size * partition_blocks_size; } + size_t getSizeInBytes() const { return block_size * partition_blocks_size; } void readKeys(PaddedPODArray & keys) const { @@ -431,7 +431,7 @@ public: } } - inline void reset() + void reset() { current_block_index = 0; current_write_block.reset(buffer.m_data); @@ -751,9 +751,9 @@ public: } } - inline size_t getCurrentBlockIndex() const { return current_block_index; } + size_t getCurrentBlockIndex() const { return current_block_index; } - inline void reset() + void reset() { current_block_index = 0; } @@ -789,7 +789,7 @@ private: int fd = -1; }; - inline static int preallocateDiskSpace(int fd, size_t offset, size_t len) + static int preallocateDiskSpace(int fd, size_t offset, size_t len) { #if defined(OS_FREEBSD) return posix_fallocate(fd, offset, len); @@ -798,7 +798,7 @@ private: #endif } - inline static char * getRequestBuffer(const iocb & request) + static char * getRequestBuffer(const iocb & request) { char * result = nullptr; @@ -811,7 +811,7 @@ private: return result; } - inline static ssize_t eventResult(io_event & event) + static ssize_t eventResult(io_event & event) { ssize_t bytes_written; @@ -986,9 +986,9 @@ private: size_t in_memory_partition_index; CellState state; - inline bool isInMemory() const { return state == in_memory; } - inline bool isOnDisk() const { return state == on_disk; } - inline bool isDefaultValue() const { return state == default_value; } + bool isInMemory() const { return state == in_memory; } + bool isOnDisk() const { return state == on_disk; } + bool isDefaultValue() const { return state == default_value; } }; struct KeyToBlockOffset @@ -1367,7 +1367,7 @@ private: } } - inline void setCellDeadline(Cell & cell, TimePoint now) + void setCellDeadline(Cell & cell, TimePoint now) { if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0) { @@ -1384,7 +1384,7 @@ private: cell.deadline = std::chrono::system_clock::to_time_t(deadline); } - inline void eraseKeyFromIndex(KeyType key) + void eraseKeyFromIndex(KeyType key) { auto it = index.find(key); diff --git a/src/Disks/IO/IOUringReader.h b/src/Disks/IO/IOUringReader.h index 89e71e4b215..359b3badc45 100644 --- a/src/Disks/IO/IOUringReader.h +++ b/src/Disks/IO/IOUringReader.h @@ -61,12 +61,12 @@ private: void monitorRing(); - template inline void failPromise(std::promise & promise, const Exception & ex) + template void failPromise(std::promise & promise, const Exception & ex) { promise.set_exception(std::make_exception_ptr(ex)); } - inline std::future makeFailedResult(const Exception & ex) + std::future makeFailedResult(const Exception & ex) { auto promise = std::promise{}; failPromise(promise, ex); diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index ff07309e248..7fd5b7476e1 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -68,7 +68,7 @@ struct DivideIntegralImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { using CastA = std::conditional_t && std::is_same_v, uint8_t, A>; using CastB = std::conditional_t && std::is_same_v, uint8_t, B>; @@ -120,7 +120,7 @@ struct ModuloImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if constexpr (std::is_floating_point_v) { @@ -175,7 +175,7 @@ struct PositiveModuloImpl : ModuloImpl using ResultType = typename NumberTraits::ResultOfPositiveModulo::Type; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { auto res = ModuloImpl::template apply(a, b); if constexpr (is_signed_v) diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h index aa0e1b04835..5b8fa41958a 100644 --- a/src/Functions/ExtractString.h +++ b/src/Functions/ExtractString.h @@ -20,7 +20,7 @@ namespace DB // includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word struct ExtractStringImpl { - static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) + static ALWAYS_INLINE const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) { // jump separators while (pos < end && isUTF8Sep(*pos)) @@ -35,10 +35,10 @@ struct ExtractStringImpl } // we use ASCII non-alphanum character as UTF8 separator - static ALWAYS_INLINE inline bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); } + static ALWAYS_INLINE bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); } // read one UTF8 character - static ALWAYS_INLINE inline void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end) + static ALWAYS_INLINE void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end) { size_t length = UTF8::seqLength(*pos); diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 6203999fa37..5d19ba44d9b 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -284,7 +284,7 @@ struct BinaryOperation private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); @@ -432,7 +432,7 @@ template struct FixedStringReduceOperationImpl { template - static void inline process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N) + static void process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N) { if constexpr (op_case == OpCase::Vector) vectorVector(a, b, result, size, N); @@ -503,7 +503,7 @@ struct StringReduceOperationImpl } } - static inline UInt64 constConst(std::string_view a, std::string_view b) + static UInt64 constConst(std::string_view a, std::string_view b) { return process( reinterpret_cast(a.data()), @@ -643,7 +643,7 @@ public: private: template - static inline void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func) + static void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func) { if (right_nullmap) { diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 37db514fd1f..83ed874c47b 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -44,27 +44,27 @@ class DefaultJSONStringSerializer public: explicit DefaultJSONStringSerializer(ColumnString & col_str_) : col_str(col_str_) { } - inline void addRawData(const char * ptr, size_t len) + void addRawData(const char * ptr, size_t len) { out << std::string_view(ptr, len); } - inline void addRawString(std::string_view str) + void addRawString(std::string_view str) { out << str; } /// serialize the json element into stringstream - inline void addElement(const Element & element) + void addElement(const Element & element) { out << element.getElement(); } - inline void commit() + void commit() { auto out_str = out.str(); col_str.insertData(out_str.data(), out_str.size()); } - inline void rollback() {} + void rollback() {} private: ColumnString & col_str; std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -82,27 +82,27 @@ public: prev_offset = offsets.empty() ? 0 : offsets.back(); } /// Put the data into column's buffer directly. - inline void addRawData(const char * ptr, size_t len) + void addRawData(const char * ptr, size_t len) { chars.insert(ptr, ptr + len); } - inline void addRawString(std::string_view str) + void addRawString(std::string_view str) { chars.insert(str.data(), str.data() + str.size()); } /// serialize the json element into column's buffer directly - inline void addElement(const Element & element) + void addElement(const Element & element) { formatter.append(element.getElement()); } - inline void commit() + void commit() { chars.push_back(0); offsets.push_back(chars.size()); } - inline void rollback() + void rollback() { chars.resize(prev_offset); } diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 14745460658..524b4f82acd 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -59,7 +59,7 @@ enum class CipherMode : uint8_t template struct KeyHolder { - inline StringRef setKey(size_t cipher_key_size, StringRef key) const + StringRef setKey(size_t cipher_key_size, StringRef key) const { if (key.size != cipher_key_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid key size: {} expected {}", key.size, cipher_key_size); @@ -71,7 +71,7 @@ struct KeyHolder template <> struct KeyHolder { - inline StringRef setKey(size_t cipher_key_size, StringRef key) + StringRef setKey(size_t cipher_key_size, StringRef key) { if (key.size < cipher_key_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid key size: {} expected {}", key.size, cipher_key_size); diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index 566ce16d1a7..adabda1a7f8 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -79,7 +79,7 @@ public: private: template - inline static void writeBitmask(T x, WriteBuffer & out) + static void writeBitmask(T x, WriteBuffer & out) { using UnsignedT = make_unsigned_t; UnsignedT u_x = x; diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index 54f7b6dd1f4..e01967274f4 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -785,7 +785,7 @@ private: #include - static inline void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep) + static void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep) { __m128i mask = _mm_loadu_si128(reinterpret_cast(getCIDRMaskIPv6(bits_to_keep).data())); __m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast(src)), mask); @@ -916,7 +916,7 @@ public: class FunctionIPv4CIDRToRange : public IFunction { private: - static inline std::pair applyCIDRMask(UInt32 src, UInt8 bits_to_keep) + static std::pair applyCIDRMask(UInt32 src, UInt8 bits_to_keep) { if (bits_to_keep >= 8 * sizeof(UInt32)) return { src, src }; diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index 6f2eec5be98..306b6395dc5 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -83,7 +83,7 @@ private: using BucketsType = typename Impl::BucketsType; template - inline BucketsType checkBucketsRange(T buckets) const + BucketsType checkBucketsRange(T buckets) const { if (unlikely(buckets <= 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} (number of buckets) must be positive number", getName()); diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 55485d41ce0..94391606762 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED; struct FunctionDetectLanguageImpl { - static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string) + static ALWAYS_INLINE std::string_view codeISO(std::string_view code_string) { if (code_string.ends_with("-Latn")) code_string.remove_suffix(code_string.size() - 5); diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 7e7ae76d6eb..2f5ce6deebf 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -170,7 +170,7 @@ public: : vec(in[in.size() - N]->getData()), next(in) {} /// Returns a combination of values in the i-th row of all columns stored in the constructor. - inline ResultValueType apply(const size_t i) const + ResultValueType apply(const size_t i) const { const auto a = !!vec[i]; return Op::apply(a, next.apply(i)); @@ -190,7 +190,7 @@ public: explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in) : vec(in[in.size() - 1]->getData()) {} - inline ResultValueType apply(const size_t i) const { return !!vec[i]; } + ResultValueType apply(const size_t i) const { return !!vec[i]; } private: const UInt8Container & vec; @@ -291,7 +291,7 @@ public: } /// Returns a combination of values in the i-th row of all columns stored in the constructor. - inline ResultValueType apply(const size_t i) const + ResultValueType apply(const size_t i) const { return Op::ternaryApply(vec[i], next.apply(i)); } @@ -315,7 +315,7 @@ public: TernaryValueBuilder::build(in[in.size() - 1], vec.data()); } - inline ResultValueType apply(const size_t i) const { return vec[i]; } + ResultValueType apply(const size_t i) const { return vec[i]; } private: UInt8Container vec; diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 41464329f79..3c2eb3ee0b8 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -84,47 +84,47 @@ struct AndImpl { using ResultType = UInt8; - static inline constexpr bool isSaturable() { return true; } + static constexpr bool isSaturable() { return true; } /// Final value in two-valued logic (no further operations with True, False will change this value) - static inline constexpr bool isSaturatedValue(bool a) { return !a; } + static constexpr bool isSaturatedValue(bool a) { return !a; } /// Final value in three-valued logic (no further operations with True, False, Null will change this value) - static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; } + static constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; } - static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; } + static constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; } - static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); } + static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); } /// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL). - static inline constexpr bool specialImplementationForNulls() { return true; } + static constexpr bool specialImplementationForNulls() { return true; } }; struct OrImpl { using ResultType = UInt8; - static inline constexpr bool isSaturable() { return true; } - static inline constexpr bool isSaturatedValue(bool a) { return a; } - static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; } - static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; } - static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); } - static inline constexpr bool specialImplementationForNulls() { return true; } + static constexpr bool isSaturable() { return true; } + static constexpr bool isSaturatedValue(bool a) { return a; } + static constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; } + static constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; } + static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); } + static constexpr bool specialImplementationForNulls() { return true; } }; struct XorImpl { using ResultType = UInt8; - static inline constexpr bool isSaturable() { return false; } - static inline constexpr bool isSaturatedValue(bool) { return false; } - static inline constexpr bool isSaturatedValueTernary(UInt8) { return false; } - static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; } - static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; } - static inline constexpr bool specialImplementationForNulls() { return false; } + static constexpr bool isSaturable() { return false; } + static constexpr bool isSaturatedValue(bool) { return false; } + static constexpr bool isSaturatedValueTernary(UInt8) { return false; } + static constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; } + static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; } + static constexpr bool specialImplementationForNulls() { return false; } #if USE_EMBEDDED_COMPILER - static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b) + static llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b) { return builder.CreateXor(a, b); } @@ -136,13 +136,13 @@ struct NotImpl { using ResultType = UInt8; - static inline ResultType apply(A a) + static ResultType apply(A a) { return !static_cast(a); } #if USE_EMBEDDED_COMPILER - static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a) + static llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a) { return builder.CreateNot(a); } diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index a93e1d9a87d..8e9eff50aab 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -21,7 +21,7 @@ namespace DB struct FunctionDetectProgrammingLanguageImpl { /// Calculate total weight - static ALWAYS_INLINE inline Float64 stateMachine( + static ALWAYS_INLINE Float64 stateMachine( const FrequencyHolder::Map & standard, const std::unordered_map & model) { diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 99f3a14dfec..1f20fbff24e 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -296,7 +296,7 @@ class FloatRoundingComputation : public BaseFloatRoundingComputation using Base = BaseFloatRoundingComputation; public: - static inline void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out) + static void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out) { auto val = Base::load(in); diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index 0bf6e39e651..cd33564caf9 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -99,7 +99,7 @@ struct Hash } template - static ALWAYS_INLINE inline UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size) + static ALWAYS_INLINE UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size) { if (size & 1) { @@ -153,7 +153,7 @@ struct Hash } template - static ALWAYS_INLINE inline UInt64 shingleHash(const std::vector & shingle, size_t offset = 0) + static ALWAYS_INLINE UInt64 shingleHash(const std::vector & shingle, size_t offset = 0) { UInt64 crc = -1ULL; @@ -177,14 +177,14 @@ struct SimHashImpl static constexpr size_t min_word_size = 4; /// Update fingerprint according to hash_value bits. - static ALWAYS_INLINE inline void updateFingerVector(Int64 * finger_vec, UInt64 hash_value) + static ALWAYS_INLINE void updateFingerVector(Int64 * finger_vec, UInt64 hash_value) { for (size_t i = 0; i < 64; ++i) finger_vec[i] += (hash_value & (1ULL << i)) ? 1 : -1; } /// Return a 64 bit value according to finger_vec. - static ALWAYS_INLINE inline UInt64 getSimHash(const Int64 * finger_vec) + static ALWAYS_INLINE UInt64 getSimHash(const Int64 * finger_vec) { UInt64 res = 0; @@ -200,7 +200,7 @@ struct SimHashImpl // for each ngram, calculate a 64 bit hash value, and update the vector according the hash value // finally return a 64 bit value(UInt64), i'th bit is 1 means vector[i] > 0, otherwise, vector[i] < 0 - static ALWAYS_INLINE inline UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size) + static ALWAYS_INLINE UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size) { if (size < shingle_size) return Hash::shingleHash(-1ULL, data, size); @@ -217,7 +217,7 @@ struct SimHashImpl return getSimHash(finger_vec); } - static ALWAYS_INLINE inline UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size) + static ALWAYS_INLINE UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size) { const UInt8 * start = data; const UInt8 * end = data + size; @@ -259,7 +259,7 @@ struct SimHashImpl // 2. next, we extract one word each time, and calculate a new hash value of the new word,then use the latest N hash // values to calculate the next word shingle hash value - static ALWAYS_INLINE inline UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size) + static ALWAYS_INLINE UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size) { const UInt8 * start = data; const UInt8 * end = data + size; @@ -400,7 +400,7 @@ struct MinHashImpl using MaxHeap = Heap>; using MinHeap = Heap>; - static ALWAYS_INLINE inline void ngramHashASCII( + static ALWAYS_INLINE void ngramHashASCII( MinHeap & min_heap, MaxHeap & max_heap, const UInt8 * data, @@ -429,7 +429,7 @@ struct MinHashImpl } } - static ALWAYS_INLINE inline void ngramHashUTF8( + static ALWAYS_INLINE void ngramHashUTF8( MinHeap & min_heap, MaxHeap & max_heap, const UInt8 * data, @@ -472,7 +472,7 @@ struct MinHashImpl // MinHash word shingle hash value calculate function: String ->Tuple(UInt64, UInt64) // for each word shingle, we calculate a hash value, but in fact, we just maintain the // K minimum and K maximum hash value - static ALWAYS_INLINE inline void wordShingleHash( + static ALWAYS_INLINE void wordShingleHash( MinHeap & min_heap, MaxHeap & max_heap, const UInt8 * data, diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index aadf5c246fc..5224c76d7b0 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -85,7 +85,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline void unrollLowering(Container & cont, const std::index_sequence &) + static ALWAYS_INLINE void unrollLowering(Container & cont, const std::index_sequence &) { ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...); } @@ -195,7 +195,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline size_t calculateNeedleStats( + static ALWAYS_INLINE size_t calculateNeedleStats( const char * data, const size_t size, NgramCount * ngram_stats, @@ -228,7 +228,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric( + static ALWAYS_INLINE UInt64 calculateHaystackStatsAndMetric( const char * data, const size_t size, NgramCount * ngram_stats, @@ -275,7 +275,7 @@ struct NgramDistanceImpl } template - static inline auto dispatchSearcher(Callback callback, Args &&... args) + static auto dispatchSearcher(Callback callback, Args &&... args) { if constexpr (!UTF8) return callback(std::forward(args)..., readASCIICodePoints, calculateASCIIHash); diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 6183d25c8bd..7522bd374a2 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -97,7 +97,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \ + static auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \ { \ return time_zone.add##INTERVAL_KIND##s(ExtendedDayNum(d), delta); \ } \ @@ -110,7 +110,7 @@ template<> \ template <> struct AddTime { - static inline NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) + static NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) { return ExtendedDayNum(static_cast(d + delta * 7)); } @@ -120,7 +120,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \ + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \ { return static_cast(t + delta * (INTERVAL)); } \ }; ADD_TIME(Day, 86400) @@ -133,7 +133,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + static NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ { \ if (scale < (DEF_SCALE)) \ { \ diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index 3de38d99c88..a8cc09186f6 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -18,7 +18,7 @@ namespace DB */ struct FunctionDetectTonalityImpl { - static ALWAYS_INLINE inline Float32 detectTonality( + static ALWAYS_INLINE Float32 detectTonality( const UInt8 * str, const size_t str_len, const FrequencyHolder::Map & emotional_dict) diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h index df531363c31..094c248497b 100644 --- a/src/Functions/GCDLCMImpl.h +++ b/src/Functions/GCDLCMImpl.h @@ -26,7 +26,7 @@ struct GCDLCMImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger::Type(a), typename NumberTraits::ToInteger::Type(b)); throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger::Type(b), typename NumberTraits::ToInteger::Type(a)); diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index eb7ef4abe56..91861e8bbd2 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -20,12 +20,12 @@ namespace ErrorCodes namespace { - inline constexpr bool is_leap_year(int32_t year) + constexpr bool is_leap_year(int32_t year) { return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); } - inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) + constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) { switch (month) { @@ -49,7 +49,7 @@ namespace /** Integer division truncated toward negative infinity. */ template - inline constexpr I div(I x, J y) + constexpr I div(I x, J y) { const auto y_cast = static_cast(y); if (x > 0 && y_cast < 0) @@ -63,7 +63,7 @@ namespace /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. */ template - inline constexpr I mod(I x, J y) + constexpr I mod(I x, J y) { const auto y_cast = static_cast(y); const auto r = x % y_cast; @@ -76,7 +76,7 @@ namespace /** Like std::min(), but the type of operands may differ. */ template - inline constexpr I min(I x, J y) + constexpr I min(I x, J y) { const auto y_cast = static_cast(y); return x < y_cast ? x : y_cast; diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index c4851718da6..0c57fd7f0b5 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -124,7 +124,7 @@ public: bool hasEmptyBound() const { return has_empty_bound; } - inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const + bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const { Point point(x, y); diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 896e9d8ca48..b52ccd3cce0 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -53,7 +53,7 @@ public: {} template - inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const + auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const { /// Type conversion from float to integer may be required. /// We are Ok with implementation specific result for out of range and denormals conversion. @@ -90,14 +90,14 @@ public: template requires(!std::same_as) - inline auto execute(const T & t, Args &&... args) const + auto execute(const T & t, Args &&... args) const { return wrapped_transform.execute(t, std::forward(args)...); } template - inline auto NO_SANITIZE_UNDEFINED executeExtendedResult(const DateTime64 & t, Args && ... args) const + auto NO_SANITIZE_UNDEFINED executeExtendedResult(const DateTime64 & t, Args && ... args) const { /// Type conversion from float to integer may be required. /// We are Ok with implementation specific result for out of range and denormals conversion. @@ -131,7 +131,7 @@ public: template requires (!std::same_as) - inline auto executeExtendedResult(const T & t, Args && ... args) const + auto executeExtendedResult(const T & t, Args && ... args) const { return wrapped_transform.executeExtendedResult(t, std::forward(args)...); } diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp index 0cd313caf1e..9ac2363f765 100644 --- a/src/Functions/abs.cpp +++ b/src/Functions/abs.cpp @@ -12,7 +12,7 @@ struct AbsImpl using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfAbs::Type>; static constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (is_decimal) return a < A(0) ? A(-a) : a; diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 395f96bbffb..fa9b3dc92dd 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -322,7 +322,7 @@ private: } template - static inline void invokeCheckNullMaps( + static void invokeCheckNullMaps( const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & str_offsets, const ColumnString::Chars & values, OffsetT item_offsets, @@ -339,7 +339,7 @@ private: } public: - static inline void process( + static void process( const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values, Offset item_offsets, PaddedPODArray & result, @@ -348,7 +348,7 @@ public: invokeCheckNullMaps(data, offsets, string_offsets, item_values, item_offsets, result, data_map, item_map); } - static inline void process( + static void process( const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values, const ColumnString::Offsets & item_offsets, PaddedPODArray & result, @@ -467,10 +467,10 @@ private: NullMaps maps; ResultColumnPtr result { ResultColumnType::create() }; - inline void moveResult() { result_column = std::move(result); } + void moveResult() { result_column = std::move(result); } }; - static inline bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) + static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); auto arg_decayed = removeNullable(removeLowCardinality(arg)); @@ -633,7 +633,7 @@ private: * (s1, s1, s2, ...), (s2, s1, s2, ...), (s3, s1, s2, ...) */ template - static inline ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); @@ -658,14 +658,14 @@ private: } template - static inline bool executeIntegral(ExecutionData& data) + static bool executeIntegral(ExecutionData& data) { return (executeIntegralExpanded(data) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static inline bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(ExecutionData& data) { return (executeIntegralImpl(data) || ...); } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index e87eff6add1..ca1e8f21aee 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -25,19 +25,19 @@ struct L1Norm struct ConstParams {}; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + fabs(value); } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return result + other_result; } template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -50,19 +50,19 @@ struct L2Norm struct ConstParams {}; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return result + value * value; } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return result + other_result; } template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return sqrt(result); } @@ -73,7 +73,7 @@ struct L2SquaredNorm : L2Norm static constexpr auto name = "L2Squared"; template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return result; } @@ -91,19 +91,19 @@ struct LpNorm }; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params) { return result + static_cast(std::pow(fabs(value), params.power)); } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return result + other_result; } template - inline static ResultType finalize(ResultType result, const ConstParams & params) + static ResultType finalize(ResultType result, const ConstParams & params) { return static_cast(std::pow(result, params.inverted_power)); } @@ -116,19 +116,19 @@ struct LinfNorm struct ConstParams {}; template - inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) + static ResultType accumulate(ResultType result, ResultType value, const ConstParams &) { return fmax(result, fabs(value)); } template - inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) { return fmax(result, other_result); } template - inline static ResultType finalize(ResultType result, const ConstParams &) + static ResultType finalize(ResultType result, const ConstParams &) { return result; } diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 8efc5181919..c6ab9023142 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -20,7 +20,7 @@ struct BitAndImpl static constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) & static_cast(b); } @@ -28,7 +28,7 @@ struct BitAndImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitAndImpl expected an integral type"); diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp index 11c0c1d1b7d..bd89b6eb69a 100644 --- a/src/Functions/bitBoolMaskAnd.cpp +++ b/src/Functions/bitBoolMaskAnd.cpp @@ -25,7 +25,7 @@ struct BitBoolMaskAndImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) + static Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) { // Should be a logical error, but this function is callable from SQL. // Need to investigate this. diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp index 7940bf3e2ca..1ddf2d258f8 100644 --- a/src/Functions/bitBoolMaskOr.cpp +++ b/src/Functions/bitBoolMaskOr.cpp @@ -25,7 +25,7 @@ struct BitBoolMaskOrImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) + static Result apply([[maybe_unused]] A left, [[maybe_unused]] B right) { if constexpr (!std::is_same_v || !std::is_same_v) // Should be a logical error, but this function is callable from SQL. diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index f1a3ac897c1..68555b1386c 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -13,7 +13,7 @@ struct BitCountImpl using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>; static constexpr bool allow_string_or_fixed_string = true; - static inline ResultType apply(A a) + static ResultType apply(A a) { /// We count bits in the value representation in memory. For example, we support floats. /// We need to avoid sign-extension when converting signed numbers to larger type. So, uint8_t(-1) has 8 bits. diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index f00f38b61af..f8a1a95ae14 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -19,7 +19,7 @@ struct BitHammingDistanceImpl static constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { /// Note: it's unspecified if signed integers should be promoted with sign-extension or with zero-fill. /// This behavior can change in the future. diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index 62ebdc7c52a..44dc77bb7bb 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -19,7 +19,7 @@ struct BitNotImpl using ResultType = typename NumberTraits::ResultOfBitNot::Type; static constexpr bool allow_string_or_fixed_string = true; - static inline ResultType NO_SANITIZE_UNDEFINED apply(A a) + static ResultType NO_SANITIZE_UNDEFINED apply(A a) { return ~static_cast(a); } @@ -27,7 +27,7 @@ struct BitNotImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitNotImpl expected an integral type"); diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index 9e19fc55219..22ce15d892d 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -19,7 +19,7 @@ struct BitOrImpl static constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) | static_cast(b); } @@ -27,7 +27,7 @@ struct BitOrImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitOrImpl expected an integral type"); diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp index c72466b8d49..2fe2c4e0f1d 100644 --- a/src/Functions/bitRotateLeft.cpp +++ b/src/Functions/bitRotateLeft.cpp @@ -20,7 +20,7 @@ struct BitRotateLeftImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers"); @@ -32,7 +32,7 @@ struct BitRotateLeftImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateLeftImpl expected an integral type"); diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp index 045758f9a31..a2f0fe12324 100644 --- a/src/Functions/bitRotateRight.cpp +++ b/src/Functions/bitRotateRight.cpp @@ -20,7 +20,7 @@ struct BitRotateRightImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers"); @@ -32,7 +32,7 @@ struct BitRotateRightImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateRightImpl expected an integral type"); diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 7b3748edb5c..c366a1ecb44 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -20,7 +20,7 @@ struct BitShiftLeftImpl static const constexpr bool allow_string_integer = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); @@ -145,7 +145,7 @@ struct BitShiftLeftImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftLeftImpl expected an integral type"); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 21a0f7584aa..1c37cd3bf4c 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -21,7 +21,7 @@ struct BitShiftRightImpl static const constexpr bool allow_string_integer = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); @@ -31,7 +31,7 @@ struct BitShiftRightImpl return static_cast(a) >> static_cast(b); } - static inline NO_SANITIZE_UNDEFINED void bitShiftRightForBytes(const UInt8 * op_pointer, const UInt8 * begin, UInt8 * out, const size_t shift_right_bits) + static NO_SANITIZE_UNDEFINED void bitShiftRightForBytes(const UInt8 * op_pointer, const UInt8 * begin, UInt8 * out, const size_t shift_right_bits) { while (op_pointer > begin) { @@ -123,7 +123,7 @@ struct BitShiftRightImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftRightImpl expected an integral type"); diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp index d8957598c62..4ff436d5708 100644 --- a/src/Functions/bitSwapLastTwo.cpp +++ b/src/Functions/bitSwapLastTwo.cpp @@ -21,7 +21,7 @@ struct BitSwapLastTwoImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) + static ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) { if constexpr (!std::is_same_v) // Should be a logical error, but this function is callable from SQL. @@ -35,7 +35,7 @@ struct BitSwapLastTwoImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; -static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) +static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "__bitSwapLastTwo expected an integral type"); diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 4c9c6aa2dfb..78ec9c8b773 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -21,7 +21,7 @@ struct BitTestImpl static const constexpr bool allow_string_integer = false; template - NO_SANITIZE_UNDEFINED static inline Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); diff --git a/src/Functions/bitTestAll.cpp b/src/Functions/bitTestAll.cpp index a2dcef3eb96..92f63bfa262 100644 --- a/src/Functions/bitTestAll.cpp +++ b/src/Functions/bitTestAll.cpp @@ -9,7 +9,7 @@ namespace struct BitTestAllImpl { template - static inline UInt8 apply(A a, B b) { return (a & b) == b; } + static UInt8 apply(A a, B b) { return (a & b) == b; } }; struct NameBitTestAll { static constexpr auto name = "bitTestAll"; }; diff --git a/src/Functions/bitTestAny.cpp b/src/Functions/bitTestAny.cpp index 6b20d6c184c..c8f445d524e 100644 --- a/src/Functions/bitTestAny.cpp +++ b/src/Functions/bitTestAny.cpp @@ -9,7 +9,7 @@ namespace struct BitTestAnyImpl { template - static inline UInt8 apply(A a, B b) { return (a & b) != 0; } + static UInt8 apply(A a, B b) { return (a & b) != 0; } }; struct NameBitTestAny { static constexpr auto name = "bitTestAny"; }; diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp index 99c06172c30..d243a6724a8 100644 --- a/src/Functions/bitWrapperFunc.cpp +++ b/src/Functions/bitWrapperFunc.cpp @@ -21,7 +21,7 @@ struct BitWrapperFuncImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) + static ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) { // Should be a logical error, but this function is callable from SQL. // Need to investigate this. diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp index 78c4c64d06e..43004c6f676 100644 --- a/src/Functions/bitXor.cpp +++ b/src/Functions/bitXor.cpp @@ -19,7 +19,7 @@ struct BitXorImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) ^ static_cast(b); } @@ -27,7 +27,7 @@ struct BitXorImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "BitXorImpl expected an integral type"); diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 4d7a4f0b53d..c06dfe15dc4 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -214,7 +214,7 @@ private: template struct QuarterWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToQuarterImpl::execute(source, timezone), buffer); } @@ -223,7 +223,7 @@ private: template struct MonthWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { const auto month = ToMonthImpl::execute(source, timezone); static constexpr std::string_view month_names[] = @@ -249,7 +249,7 @@ private: template struct WeekWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToISOWeekImpl::execute(source, timezone), buffer); } @@ -258,7 +258,7 @@ private: template struct DayOfYearWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToDayOfYearImpl::execute(source, timezone), buffer); } @@ -267,7 +267,7 @@ private: template struct DayWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToDayOfMonthImpl::execute(source, timezone), buffer); } @@ -276,7 +276,7 @@ private: template struct WeekDayWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { const auto day = ToDayOfWeekImpl::execute(source, 0, timezone); static constexpr std::string_view day_names[] = @@ -297,7 +297,7 @@ private: template struct HourWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToHourImpl::execute(source, timezone), buffer); } @@ -306,7 +306,7 @@ private: template struct MinuteWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToMinuteImpl::execute(source, timezone), buffer); } @@ -315,7 +315,7 @@ private: template struct SecondWriter { - static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) + static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { writeText(ToSecondImpl::execute(source, timezone), buffer); } diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp index ca552256cd1..7c67245c382 100644 --- a/src/Functions/divide.cpp +++ b/src/Functions/divide.cpp @@ -16,7 +16,7 @@ struct DivideFloatingImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { return static_cast(a) / b; } @@ -24,7 +24,7 @@ struct DivideFloatingImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (left->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DivideFloatingImpl expected a floating-point type"); diff --git a/src/Functions/divideDecimal.cpp b/src/Functions/divideDecimal.cpp index 1d0db232062..c8d2c5edc8a 100644 --- a/src/Functions/divideDecimal.cpp +++ b/src/Functions/divideDecimal.cpp @@ -18,7 +18,7 @@ struct DivideDecimalsImpl static constexpr auto name = "divideDecimal"; template - static inline Decimal256 + static Decimal256 execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) { if (b.value == 0) diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index b814e8198e6..7ff9126c004 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -19,7 +19,7 @@ struct FactorialImpl static const constexpr bool allow_decimal = false; static const constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (std::is_floating_point_v || is_over_big_int) throw Exception( diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 1c12317f510..1bd71f19f76 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -94,13 +94,13 @@ struct Impl } } - static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x) + static NO_SANITIZE_UNDEFINED size_t toIndex(T x) { /// Implementation specific behaviour on overflow or infinite value. return static_cast(x); } - static inline T degDiff(T f) + static T degDiff(T f) { f = std::abs(f); if (f > 180) @@ -108,7 +108,7 @@ struct Impl return f; } - inline T fastCos(T x) + T fastCos(T x) { T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); size_t i = toIndex(y); @@ -117,7 +117,7 @@ struct Impl return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; } - inline T fastSin(T x) + T fastSin(T x) { T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0)); size_t i = toIndex(y); @@ -128,7 +128,7 @@ struct Impl /// fast implementation of asin(sqrt(x)) /// max error in floats 0.00369%, in doubles 0.00072% - inline T fastAsinSqrt(T x) + T fastAsinSqrt(T x) { if (x < T(0.122)) { diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 93fd7e24853..87a48c887b4 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -15,7 +15,7 @@ struct GreatestBaseImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { return static_cast(a) > static_cast(b) ? static_cast(a) : static_cast(b); @@ -24,7 +24,7 @@ struct GreatestBaseImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) { @@ -46,7 +46,7 @@ struct GreatestSpecialImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { static_assert(std::is_same_v, "ResultType != Result"); return accurate::greaterOp(a, b) ? static_cast(a) : static_cast(b); diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp index 4e41cdbfef6..9e253e87104 100644 --- a/src/Functions/h3GetUnidirectionalEdge.cpp +++ b/src/Functions/h3GetUnidirectionalEdge.cpp @@ -108,7 +108,7 @@ public: /// suppress asan errors generated by the following: /// 'NEW_ADJUSTMENT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:142:24 /// 'NEW_DIGIT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:121:24 - __attribute__((no_sanitize_address)) static inline UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) + __attribute__((no_sanitize_address)) static UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest) { const UInt64 res = cellsToDirectedEdge(origin, dest); return res; diff --git a/src/Functions/initialQueryID.cpp b/src/Functions/initialQueryID.cpp index 469f37cf614..9c9390d4e50 100644 --- a/src/Functions/initialQueryID.cpp +++ b/src/Functions/initialQueryID.cpp @@ -19,16 +19,16 @@ public: explicit FunctionInitialQueryID(const String & initial_query_id_) : initial_query_id(initial_query_id_) {} - inline String getName() const override { return name; } + String getName() const override { return name; } - inline size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } - inline bool isDeterministic() const override { return false; } + bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index 38939556fa5..6b5bb00eacd 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -80,7 +80,7 @@ struct DivideIntegralByConstantImpl private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp index 96ff6ea80fc..f32eac17127 100644 --- a/src/Functions/intDivOrZero.cpp +++ b/src/Functions/intDivOrZero.cpp @@ -13,7 +13,7 @@ struct DivideIntegralOrZeroImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if (unlikely(divisionLeadsToFPE(a, b))) return 0; diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 6944c4701bc..733f9d55702 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -19,7 +19,7 @@ struct IntExp10Impl using ResultType = UInt64; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType apply([[maybe_unused]] A a) + static ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v || std::is_same_v) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "IntExp10 is not implemented for big integers"); diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index 4e5cc60a731..7e016a0dbd2 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -20,7 +20,7 @@ struct IntExp2Impl using ResultType = UInt64; static constexpr bool allow_string_or_fixed_string = false; - static inline ResultType apply([[maybe_unused]] A a) + static ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "intExp2 not implemented for big integers"); @@ -31,7 +31,7 @@ struct IntExp2Impl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) throw Exception(ErrorCodes::LOGICAL_ERROR, "IntExp2Impl expected an integral type"); diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp index e7aba672356..d5f5e6a8986 100644 --- a/src/Functions/isValidUTF8.cpp +++ b/src/Functions/isValidUTF8.cpp @@ -65,9 +65,9 @@ SOFTWARE. */ #ifndef __SSE4_1__ - static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); } + static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); } #else - static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) + static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { /* * Map high nibble of "First Byte" to legal character length minus 1 diff --git a/src/Functions/jumpConsistentHash.cpp b/src/Functions/jumpConsistentHash.cpp index ffc21eb5cea..fbac5d4fdd5 100644 --- a/src/Functions/jumpConsistentHash.cpp +++ b/src/Functions/jumpConsistentHash.cpp @@ -29,7 +29,7 @@ struct JumpConsistentHashImpl using BucketsType = ResultType; static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); - static inline ResultType apply(UInt64 hash, BucketsType n) + static ResultType apply(UInt64 hash, BucketsType n) { return JumpConsistentHash(hash, n); } diff --git a/src/Functions/kostikConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp index 47a9a928976..42004ed40d9 100644 --- a/src/Functions/kostikConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -17,7 +17,7 @@ struct KostikConsistentHashImpl using BucketsType = ResultType; static constexpr auto max_buckets = 32768; - static inline ResultType apply(UInt64 hash, BucketsType n) + static ResultType apply(UInt64 hash, BucketsType n) { return ConsistentHashing(hash, n); } diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f5680d4d468..babb8378d80 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -15,7 +15,7 @@ struct LeastBaseImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { /** gcc 4.9.2 successfully vectorizes a loop from this function. */ return static_cast(a) < static_cast(b) ? static_cast(a) : static_cast(b); @@ -24,7 +24,7 @@ struct LeastBaseImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) { @@ -46,7 +46,7 @@ struct LeastSpecialImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { static_assert(std::is_same_v, "ResultType != Result"); return accurate::lessOp(a, b) ? static_cast(a) : static_cast(b); diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 04877a42b18..f3b9b8a7bcb 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -13,7 +13,7 @@ struct MinusImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { if constexpr (is_big_int_v || is_big_int_v) { @@ -28,7 +28,7 @@ struct MinusImpl /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { return common::subOverflow(static_cast(a), b, c); } @@ -36,7 +36,7 @@ struct MinusImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateSub(left, right) : b.CreateFSub(left, right); } diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index cbc2ec2cd0a..ebc1c4f5275 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -105,7 +105,7 @@ struct ModuloByConstantImpl private: template - static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) + static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i) { if constexpr (op_case == OpCase::Vector) c[i] = Op::template apply(a[i], b[i]); diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp index 3551ae74c5f..cd7873b3b9e 100644 --- a/src/Functions/moduloOrZero.cpp +++ b/src/Functions/moduloOrZero.cpp @@ -15,7 +15,7 @@ struct ModuloOrZeroImpl static const constexpr bool allow_string_integer = false; template - static inline Result apply(A a, B b) + static Result apply(A a, B b) { if constexpr (std::is_floating_point_v) { diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp index 4dc8cd10f31..67b6fff6b58 100644 --- a/src/Functions/multiply.cpp +++ b/src/Functions/multiply.cpp @@ -14,7 +14,7 @@ struct MultiplyImpl static const constexpr bool allow_string_integer = false; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { if constexpr (is_big_int_v || is_big_int_v) { @@ -29,7 +29,7 @@ struct MultiplyImpl /// Apply operation and check overflow. It's used for Decimal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { if constexpr (std::is_same_v || std::is_same_v) { @@ -43,7 +43,7 @@ struct MultiplyImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateMul(left, right) : b.CreateFMul(left, right); } diff --git a/src/Functions/multiplyDecimal.cpp b/src/Functions/multiplyDecimal.cpp index ed6487c6683..7e30a893d72 100644 --- a/src/Functions/multiplyDecimal.cpp +++ b/src/Functions/multiplyDecimal.cpp @@ -17,7 +17,7 @@ struct MultiplyDecimalsImpl static constexpr auto name = "multiplyDecimal"; template - static inline Decimal256 + static Decimal256 execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) { if (a.value == 0 || b.value == 0) diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp index bd47780dea8..2c9b461274d 100644 --- a/src/Functions/negate.cpp +++ b/src/Functions/negate.cpp @@ -11,7 +11,7 @@ struct NegateImpl using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfNegate::Type>; static constexpr const bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { return -static_cast(a); } @@ -19,7 +19,7 @@ struct NegateImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { return arg->getType()->isIntegerTy() ? b.CreateNeg(arg) : b.CreateFNeg(arg); } diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index cd9cf6cec5c..ffb0fe2ade7 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -14,7 +14,7 @@ struct PlusImpl static const constexpr bool is_commutative = true; template - static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) + static NO_SANITIZE_UNDEFINED Result apply(A a, B b) { /// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1). if constexpr (is_big_int_v || is_big_int_v) @@ -30,7 +30,7 @@ struct PlusImpl /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. template - static inline bool apply(A a, B b, Result & c) + static bool apply(A a, B b, Result & c) { return common::addOverflow(static_cast(a), b, c); } @@ -38,7 +38,7 @@ struct PlusImpl #if USE_EMBEDDED_COMPILER static constexpr bool compilable = true; - static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) + static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { return left->getType()->isIntegerTy() ? b.CreateAdd(left, right) : b.CreateFAdd(left, right); } diff --git a/src/Functions/queryID.cpp b/src/Functions/queryID.cpp index 704206e1de5..5d0ac719797 100644 --- a/src/Functions/queryID.cpp +++ b/src/Functions/queryID.cpp @@ -19,16 +19,16 @@ public: explicit FunctionQueryID(const String & query_id_) : query_id(query_id_) {} - inline String getName() const override { return name; } + String getName() const override { return name; } - inline size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return std::make_shared(); } - inline bool isDeterministic() const override { return false; } + bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 84597f4eadc..7f2fe646062 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -22,14 +22,14 @@ namespace struct RepeatImpl { /// Safety threshold against DoS. - static inline void checkRepeatTime(UInt64 repeat_time) + static void checkRepeatTime(UInt64 repeat_time) { static constexpr UInt64 max_repeat_times = 1'000'000; if (repeat_time > max_repeat_times) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too many times to repeat ({}), maximum is: {}", repeat_time, max_repeat_times); } - static inline void checkStringSize(UInt64 size) + static void checkStringSize(UInt64 size) { static constexpr UInt64 max_string_size = 1 << 30; if (size > max_string_size) diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp index cca92c19b0c..38eda9f3383 100644 --- a/src/Functions/roundAge.cpp +++ b/src/Functions/roundAge.cpp @@ -12,7 +12,7 @@ struct RoundAgeImpl using ResultType = UInt8; static constexpr const bool allow_string_or_fixed_string = false; - static inline ResultType apply(A x) + static ResultType apply(A x) { return x < 1 ? 0 : (x < 18 ? 17 diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp index 918f0b3425d..963080ba0d2 100644 --- a/src/Functions/roundDuration.cpp +++ b/src/Functions/roundDuration.cpp @@ -12,7 +12,7 @@ struct RoundDurationImpl using ResultType = UInt16; static constexpr bool allow_string_or_fixed_string = false; - static inline ResultType apply(A x) + static ResultType apply(A x) { return x < 1 ? 0 : (x < 10 ? 1 diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index 607c67b742e..eb0df8884c5 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -65,7 +65,7 @@ struct RoundToExp2Impl using ResultType = T; static constexpr const bool allow_string_or_fixed_string = false; - static inline T apply(T x) + static T apply(T x) { return roundDownToPowerOfTwo(x); } diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 6c849760eed..3dd2ac8e3aa 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -11,7 +11,7 @@ struct SignImpl using ResultType = Int8; static constexpr bool allow_string_or_fixed_string = false; - static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) + static NO_SANITIZE_UNDEFINED ResultType apply(A a) { if constexpr (is_decimal || std::is_floating_point_v) return a < A(0) ? -1 : a == A(0) ? 0 : 1; diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index 4cfa629aa33..83183c991bc 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -27,7 +27,7 @@ private: static constexpr auto space = ' '; /// Safety threshold against DoS. - static inline void checkRepeatTime(size_t repeat_time) + static void checkRepeatTime(size_t repeat_time) { static constexpr auto max_repeat_times = 1'000'000uz; if (repeat_time > max_repeat_times) diff --git a/src/Functions/tokenExtractors.cpp b/src/Functions/tokenExtractors.cpp index a29d759d2ca..e7dcb5cced3 100644 --- a/src/Functions/tokenExtractors.cpp +++ b/src/Functions/tokenExtractors.cpp @@ -116,7 +116,7 @@ public: private: template - inline void executeImpl( + void executeImpl( const ExtractorType & extractor, StringColumnType & input_data_column, ResultStringColumnType & result_data_column, diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index e98f00270e2..62fe011c0b6 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -37,13 +37,13 @@ public: { Buffer(Position begin_pos_, Position end_pos_) : begin_pos(begin_pos_), end_pos(end_pos_) {} - inline Position begin() const { return begin_pos; } - inline Position end() const { return end_pos; } - inline size_t size() const { return size_t(end_pos - begin_pos); } - inline void resize(size_t size) { end_pos = begin_pos + size; } - inline bool empty() const { return size() == 0; } + Position begin() const { return begin_pos; } + Position end() const { return end_pos; } + size_t size() const { return size_t(end_pos - begin_pos); } + void resize(size_t size) { end_pos = begin_pos + size; } + bool empty() const { return size() == 0; } - inline void swap(Buffer & other) noexcept + void swap(Buffer & other) noexcept { std::swap(begin_pos, other.begin_pos); std::swap(end_pos, other.end_pos); @@ -71,21 +71,21 @@ public: } /// get buffer - inline Buffer & internalBuffer() { return internal_buffer; } + Buffer & internalBuffer() { return internal_buffer; } /// get the part of the buffer from which you can read / write data - inline Buffer & buffer() { return working_buffer; } + Buffer & buffer() { return working_buffer; } /// get (for reading and modifying) the position in the buffer - inline Position & position() { return pos; } + Position & position() { return pos; } /// offset in bytes of the cursor from the beginning of the buffer - inline size_t offset() const { return size_t(pos - working_buffer.begin()); } + size_t offset() const { return size_t(pos - working_buffer.begin()); } /// How many bytes are available for read/write - inline size_t available() const { return size_t(working_buffer.end() - pos); } + size_t available() const { return size_t(working_buffer.end() - pos); } - inline void swap(BufferBase & other) noexcept + void swap(BufferBase & other) noexcept { internal_buffer.swap(other.internal_buffer); working_buffer.swap(other.working_buffer); diff --git a/src/IO/HTTPHeaderEntries.h b/src/IO/HTTPHeaderEntries.h index 5862f1ead15..36b2ccc4ba5 100644 --- a/src/IO/HTTPHeaderEntries.h +++ b/src/IO/HTTPHeaderEntries.h @@ -10,7 +10,7 @@ struct HTTPHeaderEntry std::string value; HTTPHeaderEntry(const std::string & name_, const std::string & value_) : name(name_), value(value_) {} - inline bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; } + bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; } }; using HTTPHeaderEntries = std::vector; diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index 73e52f2c503..eba614d9d0a 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -37,7 +37,7 @@ public: Status readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out); - inline void reset() + void reset() { buffer_length = 0; block_length = -1; @@ -73,7 +73,7 @@ class HadoopSnappyReadBuffer : public CompressedReadBufferWrapper public: using Status = HadoopSnappyDecoder::Status; - inline static String statusToString(Status status) + static String statusToString(Status status) { switch (status) { diff --git a/src/IO/IReadableWriteBuffer.h b/src/IO/IReadableWriteBuffer.h index dda5fc07c8e..db379fef969 100644 --- a/src/IO/IReadableWriteBuffer.h +++ b/src/IO/IReadableWriteBuffer.h @@ -8,7 +8,7 @@ namespace DB struct IReadableWriteBuffer { /// At the first time returns getReadBufferImpl(). Next calls return nullptr. - inline std::unique_ptr tryGetReadBuffer() + std::unique_ptr tryGetReadBuffer() { if (!can_reread) return nullptr; diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 2ee209ffd6c..e831956956f 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -83,9 +83,9 @@ private: bool peekNext(); - inline bool useSubbufferOnly() const { return !peeked_size; } - inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } - inline bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } + bool useSubbufferOnly() const { return !peeked_size; } + bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); } + bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; } void checkStateCorrect() const; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 056e25a5fbe..73f5335411f 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -85,7 +85,7 @@ public: } - inline void nextIfAtEnd() + void nextIfAtEnd() { if (!hasPendingData()) next(); diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index 424cf65caf2..3b03356a8fb 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -169,7 +169,7 @@ using DeleteObjectsRequest = ExtendedRequest; class ComposeObjectRequest : public ExtendedRequest { public: - inline const char * GetServiceRequestName() const override { return "ComposeObject"; } + const char * GetServiceRequestName() const override { return "ComposeObject"; } AWS_S3_API Aws::String SerializePayload() const override; diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 1ceb938e454..ef4e0058ec3 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -41,7 +41,7 @@ public: * If direct write is performed into [position(), buffer().end()) and its length is not enough, * you need to fill it first (i.g with write call), after it the capacity is regained. */ - inline void next() + void next() { if (!offset()) return; @@ -69,7 +69,7 @@ public: /// Calling finalize() in the destructor of derived classes is a bad practice. virtual ~WriteBuffer(); - inline void nextIfAtEnd() + void nextIfAtEnd() { if (!hasPendingData()) next(); @@ -96,7 +96,7 @@ public: } } - inline void write(char x) + void write(char x) { if (finalized) throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"}; diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.h b/src/IO/ZstdDeflatingAppendableWriteBuffer.h index d9c4f32d6da..34cdf03df25 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.h +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.h @@ -27,7 +27,7 @@ class ZstdDeflatingAppendableWriteBuffer : public BufferWithOwnMemory; /// Frame end block. If we read non-empty file and see no such flag we should add it. - static inline constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; + static constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00}; ZstdDeflatingAppendableWriteBuffer( std::unique_ptr out_, diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 5a8a5bfb184..0b0460b26c8 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -133,10 +133,10 @@ struct DDLTaskBase virtual void createSyncedNodeIfNeed(const ZooKeeperPtr & /*zookeeper*/) {} - inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; } - inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; } - inline String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); } - inline String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; } + String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; } + String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; } + String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); } + String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; } static String getLogEntryName(UInt32 log_entry_number); static UInt32 getLogEntryNumber(const String & log_entry_name); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 5caa034e0e9..37125d9900c 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -284,7 +284,7 @@ private: static constexpr UInt64 bits_for_first_level = 4; using UUIDToStorageMap = std::array; - static inline size_t getFirstLevelIdx(const UUID & uuid) + static size_t getFirstLevelIdx(const UUID & uuid) { return UUIDHelpers::getHighBytes(uuid) >> (64 - bits_for_first_level); } diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 046d0b4fc10..21c773ee1d7 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -119,9 +119,9 @@ public: return result; } - inline size_t getAllocatedSize() const { return allocated_size; } + size_t getAllocatedSize() const { return allocated_size; } - inline size_t getPageSize() const { return page_size; } + size_t getPageSize() const { return page_size; } ~PageArena() { @@ -177,10 +177,10 @@ private: { } - inline void * base() const { return pages_base; } - inline size_t pagesSize() const { return pages_size; } - inline size_t pageSize() const { return page_size; } - inline size_t blockSize() const { return pages_size * page_size; } + void * base() const { return pages_base; } + size_t pagesSize() const { return pages_size; } + size_t pageSize() const { return page_size; } + size_t blockSize() const { return pages_size * page_size; } private: void * pages_base; @@ -298,7 +298,7 @@ public: return true; } - inline size_t allocatedSize() const + size_t allocatedSize() const { size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); size_t code_size = ex_page_arena.getAllocatedSize(); diff --git a/src/Interpreters/JIT/CHJIT.h b/src/Interpreters/JIT/CHJIT.h index fc883802426..89d446fd3b3 100644 --- a/src/Interpreters/JIT/CHJIT.h +++ b/src/Interpreters/JIT/CHJIT.h @@ -85,7 +85,7 @@ public: /** Total compiled code size for module that are currently valid. */ - inline size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); } + size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); } private: diff --git a/src/Interpreters/JIT/CompileDAG.h b/src/Interpreters/JIT/CompileDAG.h index 13ec763b6fc..8db4ac5e110 100644 --- a/src/Interpreters/JIT/CompileDAG.h +++ b/src/Interpreters/JIT/CompileDAG.h @@ -65,17 +65,17 @@ public: nodes.emplace_back(std::move(node)); } - inline size_t getNodesCount() const { return nodes.size(); } - inline size_t getInputNodesCount() const { return input_nodes_count; } + size_t getNodesCount() const { return nodes.size(); } + size_t getInputNodesCount() const { return input_nodes_count; } - inline Node & operator[](size_t index) { return nodes[index]; } - inline const Node & operator[](size_t index) const { return nodes[index]; } + Node & operator[](size_t index) { return nodes[index]; } + const Node & operator[](size_t index) const { return nodes[index]; } - inline Node & front() { return nodes.front(); } - inline const Node & front() const { return nodes.front(); } + Node & front() { return nodes.front(); } + const Node & front() const { return nodes.front(); } - inline Node & back() { return nodes.back(); } - inline const Node & back() const { return nodes.back(); } + Node & back() { return nodes.back(); } + const Node & back() const { return nodes.back(); } private: std::vector nodes; diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h index ff48f34d82c..f15ee2c2fb2 100644 --- a/src/Interpreters/JoinUtils.h +++ b/src/Interpreters/JoinUtils.h @@ -49,7 +49,7 @@ public: return nullptr; } - inline bool isRowFiltered(size_t row) const + bool isRowFiltered(size_t row) const { return !assert_cast(*column).getData()[row]; } diff --git a/src/Interpreters/examples/hash_map_string_3.cpp b/src/Interpreters/examples/hash_map_string_3.cpp index 57e36bed545..44ee3542bd9 100644 --- a/src/Interpreters/examples/hash_map_string_3.cpp +++ b/src/Interpreters/examples/hash_map_string_3.cpp @@ -96,7 +96,7 @@ inline bool operator==(StringRef_CompareAlwaysTrue, StringRef_CompareAlwaysTrue) struct FastHash64 { - static inline uint64_t mix(uint64_t h) + static uint64_t mix(uint64_t h) { h ^= h >> 23; h *= 0x2127599bf4325c37ULL; diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index ab16aaa56ad..58f78e5af42 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -80,7 +80,7 @@ public: bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; } bool checkForSuffixImpl(bool check_eof); - inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } + void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); } EscapingRule getEscapingRule() const override { return format_settings.custom.escaping_rule; } diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 38870473289..9a7bc03ea78 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -84,7 +84,7 @@ public: void readPrefix(); void skipField(EscapingRule escaping_rule); - inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } + void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } template ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end); diff --git a/src/Processors/Port.h b/src/Processors/Port.h index f3c7bbb5fee..2d39f2dd6be 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -38,7 +38,7 @@ public: UInt64 version = 0; UInt64 prev_version = 0; - void inline ALWAYS_INLINE update() + void ALWAYS_INLINE update() { if (version == prev_version && update_list) update_list->push_back(id); @@ -46,7 +46,7 @@ public: ++version; } - void inline ALWAYS_INLINE trigger() { prev_version = version; } + void ALWAYS_INLINE trigger() { prev_version = version; } }; protected: @@ -249,7 +249,7 @@ public: } protected: - void inline ALWAYS_INLINE updateVersion() + void ALWAYS_INLINE updateVersion() { if (likely(update_info)) update_info->update(); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index ae4cf034276..a96402247a2 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -77,12 +77,12 @@ private: bool exception_is_written = false; std::function exception_writer; - inline bool hasDelayed() const + bool hasDelayed() const { return out_maybe_delayed_and_compressed != out_maybe_compressed.get(); } - inline void finalize() + void finalize() { if (finalized) return; @@ -94,7 +94,7 @@ private: out->finalize(); } - inline bool isFinalized() const + bool isFinalized() const { return finalized; } diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index a5dea2f63db..4c8c7974005 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -70,7 +70,7 @@ public: void initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_); - inline bool isInitialized() const { return initialized; } + bool isInitialized() const { return initialized; } std::pair, std::unique_ptr> createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer, bool is_random_accessed); diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index 782a6b89519..22b3d64b1db 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -45,41 +45,41 @@ public: */ void waitMoreData(size_t start_offset_, size_t end_offset_); - inline size_t size() const { return current_offset; } + size_t size() const { return current_offset; } - inline const std::filesystem::path & getLocalPath() { return local_path; } - inline String getRemotePath() const { return file_metadata_ptr->remote_path; } + const std::filesystem::path & getLocalPath() { return local_path; } + String getRemotePath() const { return file_metadata_ptr->remote_path; } - inline UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } + UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; } bool isModified(IRemoteFileMetadataPtr file_metadata_); - inline void markInvalid() + void markInvalid() { std::lock_guard lock(mutex); valid = false; } - inline bool isValid() + bool isValid() { std::lock_guard lock(mutex); return valid; } - inline bool isEnable() + bool isEnable() { std::lock_guard lock(mutex); return is_enable; } - inline void disable() + void disable() { std::lock_guard lock(mutex); is_enable = false; } - inline void enable() + void enable() { std::lock_guard lock(mutex); is_enable = true; } IRemoteFileMetadataPtr getFileMetadata() { return file_metadata_ptr; } - inline size_t getFileSize() const { return file_metadata_ptr->file_size; } + size_t getFileSize() const { return file_metadata_ptr->file_size; } void startBackgroundDownload(std::unique_ptr in_readbuffer_, BackgroundSchedulePool & thread_pool); diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 536214e159f..20d005c8038 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -65,8 +65,8 @@ public: {ORC_INPUT_FORMAT, FileFormat::ORC}, }; - static inline bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); } - static inline FileFormat toFileFormat(const String & format_class) + static bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); } + static FileFormat toFileFormat(const String & format_class) { if (isFormatClass(format_class)) { diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index f160d1c0855..a3bc97779b3 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -82,17 +82,17 @@ public: auto pollTimeout() const { return poll_timeout; } - inline bool hasMorePolledMessages() const + bool hasMorePolledMessages() const { return (stalled_status == NOT_STALLED) && (current != messages.end()); } - inline bool polledDataUnusable() const + bool polledDataUnusable() const { return (stalled_status != NOT_STALLED) && (stalled_status != NO_MESSAGES_RETURNED); } - inline bool isStalled() const { return stalled_status != NOT_STALLED; } + bool isStalled() const { return stalled_status != NOT_STALLED; } void storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h index c9a4887cca3..bf29aaf32d0 100644 --- a/src/Storages/MergeTree/BackgroundProcessList.h +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -87,7 +87,7 @@ public: virtual void onEntryCreate(const Entry & /* entry */) {} virtual void onEntryDestroy(const Entry & /* entry */) {} - virtual inline ~BackgroundProcessList() = default; + virtual ~BackgroundProcessList() = default; }; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c380f99060e..c63f811363a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -456,23 +456,23 @@ public: /// File with compression codec name which was used to compress part columns /// by default. Some columns may have their own compression codecs, but /// default will be stored in this file. - static inline constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; + static constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt"; /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed. - static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; + static constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt"; - static inline constexpr auto UUID_FILE_NAME = "uuid.txt"; + static constexpr auto UUID_FILE_NAME = "uuid.txt"; /// File that contains information about kinds of serialization of columns /// and information that helps to choose kind of serialization later during merging /// (number of rows, number of rows with default values, etc). - static inline constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; + static constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; /// Version used for transactions. - static inline constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; + static constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; - static inline constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt"; + static constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt"; /// One of part files which is used to check how many references (I'd like /// to say hardlinks, but it will confuse even more) we have for the part @@ -484,7 +484,7 @@ public: /// it was mutation without any change for source part. In this case we /// really don't need to remove data from remote FS and need only decrement /// reference counter locally. - static inline constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt"; + static constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt"; /// Checks that all TTLs (table min/max, column ttls, so on) for part /// calculated. Part without calculated TTL may exist if TTL was added after diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index b19c42c8db8..c1514416301 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -41,13 +41,13 @@ struct MergeTreeBlockSizePredictor void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = calculateDecay()); /// Return current block size (after update()) - inline size_t getBlockSize() const + size_t getBlockSize() const { return block_size_bytes; } /// Predicts what number of rows should be read to exhaust byte quota per column - inline size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const + size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const { double max_size_per_row = std::max(std::max(max_size_per_row_fixed, 1), max_size_per_row_dynamic); return (bytes_quota > block_size_rows * max_size_per_row) @@ -56,14 +56,14 @@ struct MergeTreeBlockSizePredictor } /// Predicts what number of rows should be read to exhaust byte quota per block - inline size_t estimateNumRows(size_t bytes_quota) const + size_t estimateNumRows(size_t bytes_quota) const { return (bytes_quota > block_size_bytes) ? static_cast((bytes_quota - block_size_bytes) / std::max(1, static_cast(bytes_per_row_current))) : 0; } - inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) + void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) { double alpha = std::pow(1. - decay, rows_was_read); double current_ration = rows_was_filtered / std::max(1.0, static_cast(rows_was_read)); diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index 85006c3ffde..87445c99ade 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -64,8 +64,8 @@ public: std::string describe() const; }; -constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } -constexpr inline auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; } +constexpr auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } +constexpr auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; } inline size_t getAdaptiveMrkSizeCompact(size_t columns_num); } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 9d086e1dc37..f96206ce657 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -307,7 +307,7 @@ public: /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const; - inline const String & getReplicaName() const { return replica_name; } + const String & getReplicaName() const { return replica_name; } /// Restores table metadata if ZooKeeper lost it. /// Used only on restarted readonly replicas (not checked). All active (Active) parts are moved to detached/ diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h index dd1d64973d1..907a3fc0b13 100644 --- a/src/Storages/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -57,9 +57,9 @@ public: } } - inline uv_loop_t * getLoop() { return loop_ptr.get(); } + uv_loop_t * getLoop() { return loop_ptr.get(); } - inline const uv_loop_t * getLoop() const { return loop_ptr.get(); } + const uv_loop_t * getLoop() const { return loop_ptr.get(); } private: std::unique_ptr loop_ptr; diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 1946d8e8905..ed7f80e5df9 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -39,7 +39,7 @@ class Context; class ITableFunction : public std::enable_shared_from_this { public: - static inline std::string getDatabaseName() { return "_table_function"; } + static std::string getDatabaseName() { return "_table_function"; } /// Get the main function name. virtual std::string getName() const = 0; From 332ec7c51fe260d43bcd9b9480daaa2e95179dcb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 14:28:04 +0300 Subject: [PATCH 391/651] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 0b7e2e1f942..e9dc638341a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -35,7 +35,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( size_t max_rows_) : index_name(index_name_) , max_rows(max_rows_) - , block(index_sample_block_) + , block(index_sample_block_.cloneEmpty()) { } From 31f0b2f741e8a8c7b06e2271cfd5838a8d16fb32 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 14:52:51 +0300 Subject: [PATCH 392/651] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index e9dc638341a..797455816f0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -291,7 +291,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx const auto & column = result.getByPosition(result.columns() - 1).column; for (size_t i = 0; i < size; ++i) - if (column->getBool(i)) + if (column->getUInt(i) & 1) return true; return false; From e18fa68f3d72a0dbed4257c4922a6c534fdb677e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 15:00:14 +0300 Subject: [PATCH 393/651] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 797455816f0..068e08f6819 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -291,7 +291,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx const auto & column = result.getByPosition(result.columns() - 1).column; for (size_t i = 0; i < size; ++i) - if (column->getUInt(i) & 1) + if (!column->isNullAt(i) && (column->get64(i) & 1)) return true; return false; From 75d163da12b8c6b5671d40f33eaa12e0409f2566 Mon Sep 17 00:00:00 2001 From: avogar Date: Sun, 19 May 2024 12:17:01 +0000 Subject: [PATCH 394/651] Fix tests --- .../03159_dynamic_type_all_types.reference | 12 ++---------- .../0_stateless/03159_dynamic_type_all_types.sql | 8 ++------ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference index abecca893f9..72c5b90dbba 100644 --- a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference @@ -109,10 +109,6 @@ MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)] Map(Dynamic, Dynamic) {'11':'v1','22':'1'} Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] -Object(\'json\') {"1":"2"} -Object(Nullable(\'json\')) {"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} Point (1.23,4.5600000000000005) Ring [(1.23,4.5600000000000005),(2.34,5.67)] String string @@ -258,10 +254,6 @@ MultiPolygon [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)] Map(Dynamic, Dynamic) {'11':'v1','22':'1'} Nested(x UInt32, y String) [(1,'aa'),(2,'bb')] Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)) [(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])] -Object(\'json\') {"1":"2"} -Object(Nullable(\'json\')) {"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"} -Object(Nullable(\'json\')) {"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null} Point (1.23,4.5600000000000005) Ring [(1.23,4.5600000000000005),(2.34,5.67)] String string @@ -296,5 +288,5 @@ UInt256 1 UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639934 UInt256 115792089237316195423570985008687907853269984665640564039457584007913129639935 -50 -50 +48 +48 diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql index 64fab07ed4f..d302205ca23 100644 --- a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql +++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql @@ -49,10 +49,6 @@ INSERT INTO t VALUES ('1'::Bool), (0::Bool); -- Dates: use Date and Date32 for days, and DateTime and DateTime64 for instances in time INSERT INTO t VALUES ('2022-01-01'::Date), ('2022-01-01'::Date32), ('2022-01-01 01:01:01'::DateTime), ('2022-01-01 01:01:01.011'::DateTime64); --- JSON -INSERT INTO t VALUES ('{"1":"2"}'::JSON); -INSERT INTO t FORMAT JSONEachRow {"d" : {"k1" : 1, "k2" : 2}} {"d" : {"1" : 2, "2" : 3}} {"d" : {"2020-10-10" : "foo"}}; - -- UUID INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID); INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID); @@ -86,13 +82,13 @@ INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' yea INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String)); INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))); -SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d); +SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory; INSERT INTO t2 SELECT * FROM t; SELECT ''; -SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d); +SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d; SELECT ''; SELECT uniqExact(dynamicType(d)) t_ FROM t; From bb0fcc929695701ccde2ca49298e50792636fa1c Mon Sep 17 00:00:00 2001 From: pufit Date: Sun, 19 May 2024 08:33:37 -0400 Subject: [PATCH 395/651] better tests --- ...te_view_with_sql_security_option.reference | 2 + ...84_create_view_with_sql_security_option.sh | 78 +++++++++---------- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 931cf8ac19c..0589fdeef04 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -24,6 +24,8 @@ OK 2 OK OK +OK +100 100 ===== TestGrants ===== OK diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index f1da343da36..f32aee44bee 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -159,6 +159,45 @@ ${CLICKHOUSE_CLIENT} --query "REVOKE SELECT ON $db.test_table FROM $user1" (( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_4" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" (( $(${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED" +${CLICKHOUSE_CLIENT} --query "GRANT INSERT ON $db.source TO $user2" +${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM generateRandom() LIMIT 100" + +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination1" +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2" echo "===== TestGrants =====" ${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1" @@ -192,45 +231,6 @@ ${CLICKHOUSE_CLIENT} --user $user1 --query " ${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1" -${CLICKHOUSE_CLIENT} --multiquery < Date: Sun, 19 May 2024 12:51:14 +0000 Subject: [PATCH 396/651] Restore the warning --- .clang-tidy | 2 ++ src/Common/CurrentThread.h | 2 +- src/Common/findExtreme.cpp | 4 ++-- src/Functions/ExtractString.h | 6 +++--- .../FunctionsLanguageClassification.cpp | 2 +- .../FunctionsProgrammingClassification.cpp | 2 +- src/Functions/FunctionsStringHash.cpp | 20 +++++++++---------- src/Functions/FunctionsStringSimilarity.cpp | 6 +++--- .../FunctionsTonalityClassification.cpp | 2 +- src/Functions/PolygonUtils.h | 2 +- src/Processors/Port.h | 6 +++--- 11 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 66417c41c46..7e8f604467b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -118,6 +118,8 @@ Checks: [ '-readability-magic-numbers', '-readability-named-parameter', '-readability-redundant-declaration', + '-readability-redundant-inline-specifier', # generally useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE). + # it has an effect only if combined with `inline`: https://godbolt.org/z/Eefd74qdM '-readability-simplify-boolean-expr', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index 8dade8c6fd5..e1eb926c951 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -62,7 +62,7 @@ public: static void updatePerformanceCountersIfNeeded(); static ProfileEvents::Counters & getProfileEvents(); - static MemoryTracker * getMemoryTracker() + ALWAYS_INLINE inline static MemoryTracker * getMemoryTracker() { if (!current_thread) [[unlikely]] return nullptr; diff --git a/src/Common/findExtreme.cpp b/src/Common/findExtreme.cpp index a99b1f2dd3d..ce3bbb86d7c 100644 --- a/src/Common/findExtreme.cpp +++ b/src/Common/findExtreme.cpp @@ -11,13 +11,13 @@ namespace DB template struct MinComparator { - static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::min(a, b); } + static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::min(a, b); } }; template struct MaxComparator { - static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::max(a, b); } + static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::max(a, b); } }; MULTITARGET_FUNCTION_AVX2_SSE42( diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h index 5b8fa41958a..aa0e1b04835 100644 --- a/src/Functions/ExtractString.h +++ b/src/Functions/ExtractString.h @@ -20,7 +20,7 @@ namespace DB // includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word struct ExtractStringImpl { - static ALWAYS_INLINE const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) + static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) { // jump separators while (pos < end && isUTF8Sep(*pos)) @@ -35,10 +35,10 @@ struct ExtractStringImpl } // we use ASCII non-alphanum character as UTF8 separator - static ALWAYS_INLINE bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); } + static ALWAYS_INLINE inline bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); } // read one UTF8 character - static ALWAYS_INLINE void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end) + static ALWAYS_INLINE inline void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end) { size_t length = UTF8::seqLength(*pos); diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 94391606762..55485d41ce0 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED; struct FunctionDetectLanguageImpl { - static ALWAYS_INLINE std::string_view codeISO(std::string_view code_string) + static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string) { if (code_string.ends_with("-Latn")) code_string.remove_suffix(code_string.size() - 5); diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index 8e9eff50aab..a93e1d9a87d 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -21,7 +21,7 @@ namespace DB struct FunctionDetectProgrammingLanguageImpl { /// Calculate total weight - static ALWAYS_INLINE Float64 stateMachine( + static ALWAYS_INLINE inline Float64 stateMachine( const FrequencyHolder::Map & standard, const std::unordered_map & model) { diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index cd33564caf9..0bf6e39e651 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -99,7 +99,7 @@ struct Hash } template - static ALWAYS_INLINE UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size) + static ALWAYS_INLINE inline UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size) { if (size & 1) { @@ -153,7 +153,7 @@ struct Hash } template - static ALWAYS_INLINE UInt64 shingleHash(const std::vector & shingle, size_t offset = 0) + static ALWAYS_INLINE inline UInt64 shingleHash(const std::vector & shingle, size_t offset = 0) { UInt64 crc = -1ULL; @@ -177,14 +177,14 @@ struct SimHashImpl static constexpr size_t min_word_size = 4; /// Update fingerprint according to hash_value bits. - static ALWAYS_INLINE void updateFingerVector(Int64 * finger_vec, UInt64 hash_value) + static ALWAYS_INLINE inline void updateFingerVector(Int64 * finger_vec, UInt64 hash_value) { for (size_t i = 0; i < 64; ++i) finger_vec[i] += (hash_value & (1ULL << i)) ? 1 : -1; } /// Return a 64 bit value according to finger_vec. - static ALWAYS_INLINE UInt64 getSimHash(const Int64 * finger_vec) + static ALWAYS_INLINE inline UInt64 getSimHash(const Int64 * finger_vec) { UInt64 res = 0; @@ -200,7 +200,7 @@ struct SimHashImpl // for each ngram, calculate a 64 bit hash value, and update the vector according the hash value // finally return a 64 bit value(UInt64), i'th bit is 1 means vector[i] > 0, otherwise, vector[i] < 0 - static ALWAYS_INLINE UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size) + static ALWAYS_INLINE inline UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size) { if (size < shingle_size) return Hash::shingleHash(-1ULL, data, size); @@ -217,7 +217,7 @@ struct SimHashImpl return getSimHash(finger_vec); } - static ALWAYS_INLINE UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size) + static ALWAYS_INLINE inline UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size) { const UInt8 * start = data; const UInt8 * end = data + size; @@ -259,7 +259,7 @@ struct SimHashImpl // 2. next, we extract one word each time, and calculate a new hash value of the new word,then use the latest N hash // values to calculate the next word shingle hash value - static ALWAYS_INLINE UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size) + static ALWAYS_INLINE inline UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size) { const UInt8 * start = data; const UInt8 * end = data + size; @@ -400,7 +400,7 @@ struct MinHashImpl using MaxHeap = Heap>; using MinHeap = Heap>; - static ALWAYS_INLINE void ngramHashASCII( + static ALWAYS_INLINE inline void ngramHashASCII( MinHeap & min_heap, MaxHeap & max_heap, const UInt8 * data, @@ -429,7 +429,7 @@ struct MinHashImpl } } - static ALWAYS_INLINE void ngramHashUTF8( + static ALWAYS_INLINE inline void ngramHashUTF8( MinHeap & min_heap, MaxHeap & max_heap, const UInt8 * data, @@ -472,7 +472,7 @@ struct MinHashImpl // MinHash word shingle hash value calculate function: String ->Tuple(UInt64, UInt64) // for each word shingle, we calculate a hash value, but in fact, we just maintain the // K minimum and K maximum hash value - static ALWAYS_INLINE void wordShingleHash( + static ALWAYS_INLINE inline void wordShingleHash( MinHeap & min_heap, MaxHeap & max_heap, const UInt8 * data, diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 5224c76d7b0..7b3f2337c89 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -85,7 +85,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE void unrollLowering(Container & cont, const std::index_sequence &) + static ALWAYS_INLINE inline void unrollLowering(Container & cont, const std::index_sequence &) { ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...); } @@ -195,7 +195,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE size_t calculateNeedleStats( + static ALWAYS_INLINE inline size_t calculateNeedleStats( const char * data, const size_t size, NgramCount * ngram_stats, @@ -228,7 +228,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE UInt64 calculateHaystackStatsAndMetric( + static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric( const char * data, const size_t size, NgramCount * ngram_stats, diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index a8cc09186f6..3de38d99c88 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -18,7 +18,7 @@ namespace DB */ struct FunctionDetectTonalityImpl { - static ALWAYS_INLINE Float32 detectTonality( + static ALWAYS_INLINE inline Float32 detectTonality( const UInt8 * str, const size_t str_len, const FrequencyHolder::Map & emotional_dict) diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 0c57fd7f0b5..4ab146b085f 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -124,7 +124,7 @@ public: bool hasEmptyBound() const { return has_empty_bound; } - bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const + bool ALWAYS_INLINE inline contains(CoordinateType x, CoordinateType y) const { Point point(x, y); diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 2d39f2dd6be..f3c7bbb5fee 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -38,7 +38,7 @@ public: UInt64 version = 0; UInt64 prev_version = 0; - void ALWAYS_INLINE update() + void inline ALWAYS_INLINE update() { if (version == prev_version && update_list) update_list->push_back(id); @@ -46,7 +46,7 @@ public: ++version; } - void ALWAYS_INLINE trigger() { prev_version = version; } + void inline ALWAYS_INLINE trigger() { prev_version = version; } }; protected: @@ -249,7 +249,7 @@ public: } protected: - void ALWAYS_INLINE updateVersion() + void inline ALWAYS_INLINE updateVersion() { if (likely(update_info)) update_info->update(); From 639f7f166f6ba1f4c078b30e66fd40605b9866f5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 12:53:17 +0000 Subject: [PATCH 397/651] Fix typo --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 7e8f604467b..7dafaeb9e3f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -119,7 +119,7 @@ Checks: [ '-readability-named-parameter', '-readability-redundant-declaration', '-readability-redundant-inline-specifier', # generally useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE). - # it has an effect only if combined with `inline`: https://godbolt.org/z/Eefd74qdM + # ALWAYS_INLINE has an effect only if combined with `inline`: https://godbolt.org/z/Eefd74qdM '-readability-simplify-boolean-expr', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', From ff392b0aeb668d34049dfaee0966fba91186227c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:00:30 +0000 Subject: [PATCH 398/651] Minor corrections --- src/Common/CurrentThread.h | 2 +- src/Functions/PolygonUtils.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index e1eb926c951..53b61ba315f 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -62,7 +62,7 @@ public: static void updatePerformanceCountersIfNeeded(); static ProfileEvents::Counters & getProfileEvents(); - ALWAYS_INLINE inline static MemoryTracker * getMemoryTracker() + inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker() { if (!current_thread) [[unlikely]] return nullptr; diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 4ab146b085f..c4851718da6 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -124,7 +124,7 @@ public: bool hasEmptyBound() const { return has_empty_bound; } - bool ALWAYS_INLINE inline contains(CoordinateType x, CoordinateType y) const + inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const { Point point(x, y); From 11af3fd54f6ee3ed0291fee9ed88a852f03a252a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 16:13:41 +0300 Subject: [PATCH 399/651] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 068e08f6819..3e5cbb34556 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -267,6 +267,8 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( filter_actions_dag->removeUnusedActions(); actions = std::make_shared(filter_actions_dag); + + actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name; } bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const @@ -288,7 +290,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx Block result = granule.block; actions->execute(result); - const auto & column = result.getByPosition(result.columns() - 1).column; + const auto & column = result.getByName(actions_output_column_name).column; for (size_t i = 0; i < size; ++i) if (!column->isNullAt(i) && (column->get64(i) & 1)) From a28309689f26e161dfbaa014bc51dea7460de30f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 16:13:58 +0300 Subject: [PATCH 400/651] Update MergeTreeIndexSet.h --- src/Storages/MergeTree/MergeTreeIndexSet.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 3348b5fbe34..901653e47d6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -126,6 +126,7 @@ private: std::unordered_set key_columns; ExpressionActionsPtr actions; + String actions_output_column_name; }; From f143ae6969c77b5ebe44ec4865251caaa18db7fa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 14:31:21 +0000 Subject: [PATCH 401/651] Fix build --- src/Coordination/KeeperServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b07c90b8660..736a01443ce 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -990,7 +990,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate( raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); return Accepted; } - chassert(false); + std::unreachable(); } ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) From 524f289f47fc6c7cd7b0ac52e6827f8636907160 Mon Sep 17 00:00:00 2001 From: Yohann Jardin Date: Sun, 19 May 2024 17:20:22 +0200 Subject: [PATCH 402/651] empty commit From 513900cb524d7b3e96cfbe8b8b56d9b0b0eb6070 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 19 May 2024 15:44:19 +0000 Subject: [PATCH 403/651] assume columns from projection are aggregates --- src/Planner/PlannerExpressionAnalysis.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index e7d553af944..399bbfc67cf 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -454,6 +454,13 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); std::unordered_set aggregation_keys; + + auto projection_expression_dag = std::make_shared(); + for (const auto & node : query_node.getProjection()) + actions_visitor.visit(projection_expression_dag, node); + for (const auto & node : projection_expression_dag->getNodes()) + aggregation_keys.insert(node.result_name); + if (aggregation_analysis_result_optional) aggregation_keys.insert(aggregation_analysis_result_optional->aggregation_keys.begin(), aggregation_analysis_result_optional->aggregation_keys.end()); From 5698ef698d20c12d83fa7f685cbfee9352b4583d Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 18 May 2024 21:17:22 +0200 Subject: [PATCH 404/651] check overall wf status in mergeable check --- .github/workflows/pull_request.yml | 18 +++++++----------- tests/ci/ci.py | 8 ++++---- tests/ci/commit_status_helper.py | 4 ++++ tests/ci/finish_check.py | 2 +- tests/ci/merge_pr.py | 15 ++++++++++++++- 5 files changed, 30 insertions(+), 17 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f2e4b5f328d..21c2e48677d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -140,13 +140,11 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should by run even if Builds_1/2 fail, so put them separatly in wf (not in Tests_1/2) + # Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2) Builds_1_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: - - RunConfig - - Builds_1 + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + needs: [RunConfig, StyleCheck, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check @@ -154,10 +152,8 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} Builds_2_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: - - RunConfig - - Builds_2 + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} + needs: [RunConfig, StyleCheck, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse special build check @@ -165,7 +161,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() }} + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: @@ -176,7 +172,7 @@ jobs: - name: Check and set merge status run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 merge_pr.py --set-ci-status + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} ################################# Stage Final ################################# # diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 3aa8f1bb813..3a616c8aad6 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -886,9 +886,9 @@ class CiOptions: for job in job_with_parents: if job in jobs_to_do and job not in jobs_to_do_requested: jobs_to_do_requested.append(job) - assert ( - jobs_to_do_requested - ), f"Include tags are set but no job configured - Invalid tags, probably [{self.include_keywords}]" + print( + f"WARNING: Include tags are set but no job configured - Invalid tags, probably [{self.include_keywords}]" + ) if JobNames.STYLE_CHECK not in jobs_to_do_requested: # Style check must not be omitted jobs_to_do_requested.append(JobNames.STYLE_CHECK) @@ -898,7 +898,7 @@ class CiOptions: if self.ci_sets: for tag in self.ci_sets: label_config = CI_CONFIG.get_label_config(tag) - assert label_config, f"Unknonwn tag [{tag}]" + assert label_config, f"Unknown tag [{tag}]" print( f"NOTE: CI Set's tag: [{tag}], add jobs: [{label_config.run_jobs}]" ) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index ec9746e9af9..733b07813a5 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -471,6 +471,7 @@ def trigger_mergeable_check( statuses: CommitStatuses, hide_url: bool = False, set_if_green: bool = False, + workflow_failed: bool = False, ) -> StatusType: """calculate and update StatusNames.MERGEABLE""" required_checks = [ @@ -501,6 +502,9 @@ def trigger_mergeable_check( if fail: description = "failed: " + ", ".join(fail) state = FAILURE + elif workflow_failed: + description = "check workflow failures" + state = FAILURE description = format_description(description) if not set_if_green and state == SUCCESS: diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 615b26b51f0..b31be7654d3 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -17,7 +17,7 @@ from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import PENDING, SUCCESS from synchronizer_utils import SYNC_BRANCH_PREFIX -from tests.ci.env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY +from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY def main(): diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 519fa5fcebb..500de4eb718 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -182,6 +182,12 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, only update/set Mergeable Check status", ) + parser.add_argument( + "--wf-status", + type=str, + default="", + help="overall workflow status [success|failure]. used with --set-ci-status only", + ) parser.add_argument( "--check-approved", action="store_true", @@ -237,10 +243,17 @@ def main(): repo = gh.get_repo(args.repo) if args.set_ci_status: + assert args.wf_status in ("failure", "success") # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses, hide_url=False, set_if_green=True) + trigger_mergeable_check( + commit, + statuses, + hide_url=False, + set_if_green=True, + workflow_failed=(args.wf_status != "success"), + ) return # An ugly and not nice fix to patch the wrong organization URL, From 83572b7f05e0ba68accd0ea766f243d7adbaf070 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 17:23:52 +0000 Subject: [PATCH 405/651] Trigger build From 4a8e663b1ae72b1d57ad15adce9eddbb3f6e1d28 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 15:45:23 +0000 Subject: [PATCH 406/651] Fix clang-tidy after clang-18 transition --- .clang-tidy | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index e2f318562ec..d4c57a52dd6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -22,6 +22,7 @@ Checks: [ '-bugprone-exception-escape', '-bugprone-forward-declaration-namespace', '-bugprone-implicit-widening-of-multiplication-result', + '-bugprone-multi-level-implicit-pointer-conversion', '-bugprone-narrowing-conversions', '-bugprone-not-null-terminated-result', '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged @@ -98,6 +99,7 @@ Checks: [ '-modernize-use-nodiscard', '-modernize-use-trailing-return-type', + '-performance-enum-size', '-performance-inefficient-string-concatenation', '-performance-no-int-to-ptr', '-performance-avoid-endl', @@ -105,6 +107,7 @@ Checks: [ '-portability-simd-intrinsics', + '-readability-avoid-nested-conditional-operator', '-readability-avoid-unconditional-preprocessor-if', '-readability-braces-around-statements', '-readability-convert-member-functions-to-static', @@ -118,6 +121,12 @@ Checks: [ '-readability-magic-numbers', '-readability-named-parameter', '-readability-redundant-declaration', + '-readability-redundant-inline-specifier', + '-readability-redundant-member-init', # useful but triggers another problem. Imagine a struct S with String members. If S is + # initialized via designated initializer, then the caller needs to initialize *all* members + # (to avoid warning `missing-field-initializers`) unless the mebers are default-initialized + # in S. So this clang-tidy warning prevents default-initialization and writing out all members + # in a designated initializer is ugly... '-readability-simplify-boolean-expr', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', @@ -125,17 +134,6 @@ Checks: [ '-zircon-*', - # These are new in clang-18, and we have to sort them out: - '-readability-avoid-nested-conditional-operator', - '-modernize-use-designated-initializers', - '-performance-enum-size', - '-readability-redundant-inline-specifier', - '-readability-redundant-member-init', - '-bugprone-crtp-constructor-accessibility', - '-bugprone-suspicious-stringview-data-usage', - '-bugprone-multi-level-implicit-pointer-conversion', - '-cert-err33-c', - # This is a good check, but clang-tidy crashes, see https://github.com/llvm/llvm-project/issues/91872 '-modernize-use-constraints', # https://github.com/abseil/abseil-cpp/issues/1667 From 56e8027883f7732cf090f835b6e25dd329307bc1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 17:53:06 +0000 Subject: [PATCH 407/651] Improve comment --- .clang-tidy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index d4c57a52dd6..219ac263ab3 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -122,11 +122,11 @@ Checks: [ '-readability-named-parameter', '-readability-redundant-declaration', '-readability-redundant-inline-specifier', - '-readability-redundant-member-init', # useful but triggers another problem. Imagine a struct S with String members. If S is - # initialized via designated initializer, then the caller needs to initialize *all* members - # (to avoid warning `missing-field-initializers`) unless the mebers are default-initialized - # in S. So this clang-tidy warning prevents default-initialization and writing out all members - # in a designated initializer is ugly... + '-readability-redundant-member-init', # Useful but triggers another problem. Imagine a struct S with multiple String members. Structs are often instantiated via designated + # initializer S s{.s1 = [...], .s2 = [...], [...]}. In this case, compiler warning `missing-field-initializers` requires to specify all members which are not in-struct + # initialized (example: s1 in struct S { String s1; String s2{};}; is not in-struct initialized, therefore it must be specified at instantiation time). As explicitly + # specifying all members is tedious for large structs, `missing-field-initializers` makes programmers initialize as many members as possible in-struct. Clang-tidy + # warning `readability-redundant-member-init` does the opposite thing, both are not compatible with each other. '-readability-simplify-boolean-expr', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', From 1293a0f79572213f2cd90f5a6f09fbe39d8dbf9e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 18:47:58 +0000 Subject: [PATCH 408/651] Cosmetics, pt. I --- src/Functions/generateSnowflakeID.cpp | 95 +++++++++++++-------------- 1 file changed, 45 insertions(+), 50 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 1decda0ab46..28fc2eb6b05 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -18,8 +18,7 @@ namespace ErrorCodes namespace { -/* - Snowflake ID +/* Snowflake ID https://en.wikipedia.org/wiki/Snowflake_ID 0 1 2 3 @@ -30,35 +29,34 @@ namespace | | machine_id | machine_seq_num | ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ -- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds -- The middle 10 bits are the machine ID. -- The last 12 bits decode to number of ids processed by the machine at the given millisecond. +- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970) +- The middle 10 bits are the machine ID +- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes */ -constexpr auto timestamp_size = 41; -constexpr auto machine_id_size = 10; -constexpr auto machine_seq_num_size = 12; +constexpr auto timestamp_bits_count = 41; +constexpr auto machine_id_bits_count = 10; +constexpr auto machine_seq_num_bits_count = 12; -constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size); -constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size; -constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1; +constexpr int64_t timestamp_mask = ((1LL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); +constexpr int64_t machine_id_mask = ((1LL << machine_id_bits_count) - 1) << machine_seq_num_bits_count; +constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_bits_count) - 1; constexpr int64_t max_machine_seq_num = machine_seq_num_mask; Int64 getMachineID() { - auto serverUUID = ServerUUID::get(); - - // hash serverUUID into 64 bits - Int64 h = UUIDHelpers::getHighBytes(serverUUID); - Int64 l = UUIDHelpers::getLowBytes(serverUUID); - return ((h * 11) ^ (l * 17)) & machine_id_mask; + UUID server_uuid = ServerUUID::get(); + /// hash into 64 bits + UInt64 hi = UUIDHelpers::getHighBytes(server_uuid); + UInt64 lo = UUIDHelpers::getLowBytes(server_uuid); + return ((hi * 11) ^ (lo * 17)) & machine_id_mask; } Int64 getTimestamp() { - const auto tm_point = std::chrono::system_clock::now(); - return std::chrono::duration_cast( - tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1); + auto now = std::chrono::system_clock::now(); + auto ticks_since_epoch = std::chrono::duration_cast(now.time_since_epoch()).count(); + return ticks_since_epoch & ((1LL << timestamp_bits_count) - 1); } } @@ -66,16 +64,11 @@ Int64 getTimestamp() class FunctionSnowflakeID : public IFunction { private: - mutable std::atomic lowest_available_snowflake_id{0}; - // 1 atomic value because we don't want to use mutex + mutable std::atomic lowest_available_snowflake_id = 0; /// atomic to avoid a mutex public: static constexpr auto name = "generateSnowflakeID"; - - static FunctionPtr create(ContextPtr /*context*/) - { - return std::make_shared(); - } + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -95,31 +88,34 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - Int64 size64 = static_cast(input_rows_count); + vec_to.resize(input_rows_count); if (input_rows_count == 0) { return col_res; } - Int64 machine_id = getMachineID(); + const Int64 machine_id = getMachineID(); Int64 current_timestamp = getTimestamp(); Int64 current_machine_seq_num; - Int64 available_id, next_available_id; + Int64 available_snowflake_id, next_available_snowflake_id; + + const Int64 size64 = static_cast(input_rows_count); + do { - available_id = lowest_available_snowflake_id.load(); - Int64 available_timestamp = (available_id & timestamp_mask) >> (machine_id_size + machine_seq_num_size); - Int64 available_machine_seq_num = available_id & machine_seq_num_mask; + available_snowflake_id = lowest_available_snowflake_id.load(); + const Int64 available_timestamp = (available_snowflake_id & timestamp_mask) >> (machine_id_bits_count + machine_seq_num_bits_count); + const Int64 available_machine_seq_num = available_snowflake_id & machine_seq_num_mask; if (current_timestamp > available_timestamp) { + /// handle overflow current_machine_seq_num = 0; } else @@ -128,24 +124,23 @@ public: current_machine_seq_num = available_machine_seq_num; } - // calculate new `lowest_available_snowflake_id` + /// calculate new lowest_available_snowflake_id + const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1); Int64 new_timestamp; - Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1); - if (size64 >= seq_nums_in_current_timestamp_left) { + if (size64 >= seq_nums_in_current_timestamp_left) new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num; - } else { + else new_timestamp = current_timestamp; - } - Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask; - next_available_id = (new_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | new_machine_seq_num; + const Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask; + next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num; } - while (!lowest_available_snowflake_id.compare_exchange_strong(available_id, next_available_id)); - // failed CAS => another thread updated `lowest_available_snowflake_id` - // successful CAS => we have our range of exclusive values + while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id)); + /// failed CAS => another thread updated `lowest_available_snowflake_id` + /// successful CAS => we have our range of exclusive values - for (Int64 & el : vec_to) + for (Int64 & to_row : vec_to) { - el = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | current_machine_seq_num; + to_row = (current_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | current_machine_seq_num; if (current_machine_seq_num++ == max_machine_seq_num) { current_machine_seq_num = 0; @@ -163,10 +158,10 @@ REGISTER_FUNCTION(GenerateSnowflakeID) factory.registerFunction(FunctionDocumentation { .description=R"( -Generates Snowflake ID -- unique identificators contains: -- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds -- The middle 10 bits are the machine ID. -- The last 12 bits decode to number of ids processed by the machine at the given millisecond. +Generates a SnowflakeID -- unique identificators contains: +- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970) +- The middle 10 bits are the machine ID +- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function guarantees strict monotony on 1 machine and differences in values obtained on different machines. From 08a3c16a5aca95c73cc0ea1aaf2d57edb6acaef2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 18:53:51 +0000 Subject: [PATCH 409/651] Cosmetics, pt. II --- src/Functions/generateSnowflakeID.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 28fc2eb6b05..d70b8349cd8 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -105,7 +105,7 @@ public: Int64 available_snowflake_id, next_available_snowflake_id; - const Int64 size64 = static_cast(input_rows_count); + const Int64 input_rows_count_signed = static_cast(input_rows_count); do { @@ -127,11 +127,11 @@ public: /// calculate new lowest_available_snowflake_id const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1); Int64 new_timestamp; - if (size64 >= seq_nums_in_current_timestamp_left) - new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num; + if (input_rows_count_signed >= seq_nums_in_current_timestamp_left) + new_timestamp = current_timestamp + 1 + (input_rows_count_signed - seq_nums_in_current_timestamp_left) / max_machine_seq_num; else new_timestamp = current_timestamp; - const Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask; + const Int64 new_machine_seq_num = (current_machine_seq_num + input_rows_count_signed) & machine_seq_num_mask; next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num; } while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id)); From 2765fd951cbcd6f5c576ee1919ae644cb4d76256 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 19 May 2024 21:02:12 +0200 Subject: [PATCH 410/651] Properly support native copy for azure --- src/Backups/BackupFactory.h | 1 + src/Backups/BackupIO_AzureBlobStorage.cpp | 38 ++-- src/Backups/BackupIO_AzureBlobStorage.h | 15 +- src/Backups/BackupSettings.cpp | 1 + src/Backups/BackupSettings.h | 3 + src/Backups/BackupsWorker.cpp | 1 + .../registerBackupEngineAzureBlobStorage.cpp | 22 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 7 +- .../AzureBlobStorage/AzureObjectStorage.h | 6 +- .../ObjectStorages/ObjectStorageFactory.cpp | 5 +- .../copyAzureBlobStorageFile.cpp | 1 + src/Storages/StorageAzureBlob.cpp | 10 +- src/Storages/StorageAzureBlob.h | 2 + .../TableFunctionAzureBlobStorage.cpp | 4 +- .../TableFunctionAzureBlobStorageCluster.cpp | 4 +- .../__init__.py | 1 + .../test.py | 215 ++++++++++++++++++ 17 files changed, 301 insertions(+), 35 deletions(-) create mode 100644 tests/integration/test_azure_blob_storage_native_copy/__init__.py create mode 100644 tests/integration/test_azure_blob_storage_native_copy/test.py diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 4e752508577..e13a9a12ca2 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -39,6 +39,7 @@ public: std::optional backup_uuid; bool deduplicate_files = true; bool allow_s3_native_copy = true; + bool allow_azure_native_copy = true; bool use_same_s3_credentials_for_base_backup = false; bool azure_attempt_to_create_container = true; ReadSettings read_settings; diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index a3998431674..672a68e089f 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -31,22 +31,28 @@ namespace ErrorCodes BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURLWithContainer(), false, false} , configuration(configuration_) { auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); - object_storage = std::make_unique("BackupReaderAzureBlobStorage", - std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + object_storage = std::make_unique( + "BackupReaderAzureBlobStorage", + std::move(client_ptr), + StorageAzureBlob::createSettings(context_), + configuration.container, + configuration.getConnectionURLWithContainer()); + client = object_storage->getAzureBlobStorageClient(); - settings = object_storage->getSettings(); + auto settings_copy = *object_storage->getSettings(); + settings_copy.use_native_copy = allow_azure_native_copy; + settings = std::make_unique(settings_copy); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; @@ -76,9 +82,9 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) { auto destination_data_source_description = destination_disk->getDataSourceDescription(); - if ((destination_data_source_description.type == DataSourceType::ObjectStorage) - && (destination_data_source_description.object_storage_type == ObjectStorageType::Azure) - && (destination_data_source_description.is_encrypted == encrypted_in_backup)) + LOG_TRACE(log, "Source description {} desctionation description {}", data_source_description.description, destination_data_source_description.description); + if (destination_data_source_description.sameKind(data_source_description) + && destination_data_source_description.is_encrypted == encrypted_in_backup) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t @@ -116,12 +122,13 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURLWithContainer(), false, false} , configuration(configuration_) { auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); @@ -130,9 +137,12 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), StorageAzureBlob::createSettings(context_), - configuration_.container); + configuration_.container, + configuration.getConnectionURLWithContainer()); client = object_storage->getAzureBlobStorageClient(); - settings = object_storage->getSettings(); + auto settings_copy = *object_storage->getSettings(); + settings_copy.use_native_copy = allow_azure_native_copy; + settings = std::make_unique(settings_copy); } void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, @@ -140,7 +150,9 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible. auto source_data_source_description = src_disk->getDataSourceDescription(); - if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) + LOG_TRACE(log, "Source description {} desctionation description {}", source_data_source_description.description, data_source_description.description); + if (source_data_source_description.sameKind(data_source_description) + && source_data_source_description.is_encrypted == copy_encrypted) { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index f0b9aace4d4..3a909ab684a 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -16,7 +16,12 @@ namespace DB class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: - BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderAzureBlobStorage( + StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_); ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -37,7 +42,13 @@ private: class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); + BackupWriterAzureBlobStorage( + StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool attempt_to_create_container); ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 06f49dfa448..e33880f88e3 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -27,6 +27,7 @@ namespace ErrorCodes M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ M(Bool, allow_s3_native_copy) \ + M(Bool, allow_azure_native_copy) \ M(Bool, use_same_s3_credentials_for_base_backup) \ M(Bool, azure_attempt_to_create_container) \ M(Bool, read_from_filesystem_cache) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index eccf4e90ce7..a6c4d5d7181 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -44,6 +44,9 @@ struct BackupSettings /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) bool allow_s3_native_copy = true; + /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) + bool allow_azure_native_copy = true; + /// Whether base backup to S3 should inherit credentials from the BACKUP query. bool use_same_s3_credentials_for_base_backup = false; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 9057dc9d198..69d9c52ebd9 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -598,6 +598,7 @@ void BackupsWorker::doBackup( backup_create_params.backup_uuid = backup_settings.backup_uuid; backup_create_params.deduplicate_files = backup_settings.deduplicate_files; backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; + backup_create_params.allow_azure_native_copy = backup_settings.allow_azure_native_copy; backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup; backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container; backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings); diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 1b9545fc455..8b05965f472 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -135,10 +135,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(configuration, - params.read_settings, - params.write_settings, - params.context); + auto reader = std::make_shared( + configuration, + params.allow_azure_native_copy, + params.read_settings, + params.write_settings, + params.context); return std::make_unique( params.backup_info, @@ -150,11 +152,13 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } else { - auto writer = std::make_shared(configuration, - params.read_settings, - params.write_settings, - params.context, - params.azure_attempt_to_create_container); + auto writer = std::make_shared( + configuration, + params.allow_azure_native_copy, + params.read_settings, + params.write_settings, + params.context, + params.azure_attempt_to_create_container); return std::make_unique( params.backup_info, diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 36225b13ee8..bee8e206ec4 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -107,11 +107,13 @@ AzureObjectStorage::AzureObjectStorage( const String & name_, AzureClientPtr && client_, SettingsPtr && settings_, - const String & object_namespace_) + const String & object_namespace_, + const String & description_) : name(name_) , client(std::move(client_)) , settings(std::move(settings_)) , object_namespace(object_namespace_) + , description(description_) , log(getLogger("AzureObjectStorage")) { } @@ -409,7 +411,8 @@ std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace + object_namespace, + description ); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f52ab803012..3d94090bcc6 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -81,7 +81,8 @@ public: const String & name_, AzureClientPtr && client_, SettingsPtr && settings_, - const String & object_namespace_); + const String & object_namespace_, + const String & description_); void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; @@ -93,7 +94,7 @@ public: std::string getCommonKeyPrefix() const override { return ""; } - std::string getDescription() const override { return client.get()->GetUrl(); } + std::string getDescription() const override { return description; } bool exists(const StoredObject & object) const override; @@ -172,6 +173,7 @@ private: MultiVersion client; MultiVersion settings; const String object_namespace; /// container + prefix + const String description; /// url + container LoggerPtr log; }; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 761ff24e648..cddcea979b5 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -306,11 +306,14 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + std::string endpoint_string = endpoint.getEndpoint(); + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); + endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix, + endpoint.prefix.empty() ? endpoint_string : endpoint_string.substr(0, endpoint_string.length() - endpoint.prefix.length())); }; factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..dc46de1e07f 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -289,6 +289,7 @@ void copyAzureBlobStorageFile( if (settings->use_native_copy) { + LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 9c551e82a99..0103fc0d2a2 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -302,8 +302,8 @@ void registerStorageAzureBlob(StorageFactory & factory) auto settings = StorageAzureBlob::createSettings(args.getContext()); return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), + configuration, + std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), args.getContext(), args.table_id, args.columns, @@ -491,6 +491,12 @@ Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); } +std::string StorageAzureBlob::Configuration::getConnectionURLWithContainer() const +{ + auto url = getConnectionURL(); + return fs::path(url.toString()) / container; +} + bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const { if (!withPartitionWildcard()) diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index b433cd92d68..7bce40bce26 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -45,6 +45,8 @@ public: Poco::URI getConnectionURL() const; + std::string getConnectionURLWithContainer() const; + std::string connection_url; bool is_connection_string; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 275cd2a9cbb..e73277b4d7b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -333,7 +333,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); + auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()); if (configuration.format == "auto") return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); @@ -365,7 +365,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct StoragePtr storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), context, StorageID(getDatabaseName(), table_name), columns, diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index 04dddca7672..dc65426a6e3 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -39,7 +39,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( /// On worker node this filename won't contains globs storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), context, StorageID(getDatabaseName(), table_name), columns, @@ -54,7 +54,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( storage = std::make_shared( cluster_name, configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, diff --git a/tests/integration/test_azure_blob_storage_native_copy/__init__.py b/tests/integration/test_azure_blob_storage_native_copy/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_native_copy/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py new file mode 100644 index 00000000000..b16d9b4b5c4 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +import gzip +import json +import logging +import os +import io +import random +import threading +import time + +from azure.storage.blob import BlobServiceClient +import helpers.client +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers +from helpers.test_tools import exec_query_with_retry + + +def generate_config(port): + path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "./_gen/storage_conf.xml", + ) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w") as f: + TEMPLATE = """ + + + + + local + object_storage + azure_blob_storage + http://azurite1:{port}/devstoreaccount1 + cont + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + true + + + local + object_storage + azure_blob_storage + true + http://azurite1:{port}/devstoreaccount1 + othercontainer + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + + + cache + disk_azure + /tmp/azure_cache/ + 1000000000 + 1 + + + + + +
+ disk_azure +
+
+
+ + +
+ disk_azure_other_bucket +
+
+
+ + +
+ disk_azure_cache +
+
+
+
+
+ + disk_azure + disk_azure_cache + disk_azure_other_bucket + +
+ """ + f.write(TEMPLATE.format(port=port)) + return path + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + port = cluster.azurite_port + path = generate_config(port) + cluster.add_instance( + "node1", + main_configs=[path], + with_azurite=True, + ) + cluster.add_instance( + "node2", + main_configs=[path], + with_azurite=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def azure_query( + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None +): + for i in range(try_num): + try: + if expect_error: + return node.query_and_get_error(query, settings=settings) + else: + return node.query(query, settings=settings) + except Exception as ex: + retriable_errors = [ + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read", + ] + retry = False + for error in retriable_errors: + if error in str(ex): + retry = True + print(f"Try num: {i}. Having retriable error: {ex}") + time.sleep(i) + break + if not retry or i == try_num - 1: + raise Exception(ex) + if query_on_retry is not None: + node.query(query_on_retry) + continue + + +def test_backup_restore_on_merge_tree_same_container(cluster): + node1 = cluster.instances["node1"] + azure_query( + node1, + f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_cache'", + ) + azure_query(node1, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')") + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')" + print("BACKUP DEST", backup_destination) + azure_query( + node1, + f"BACKUP TABLE test_simple_merge_tree TO {backup_destination}", + ) + + assert node1.contains_in_log("using native copy") + + azure_query( + node1, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", + ) + assert ( + azure_query(node1, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n" + ) + azure_query(node1, f"DROP TABLE test_simple_merge_tree") + azure_query(node1, f"DROP TABLE test_simple_merge_tree_restored") + + +def test_backup_restore_on_merge_tree_different_container(cluster): + node2 = cluster.instances["node2"] + azure_query( + node2, + f"CREATE TABLE test_simple_merge_tree_different_bucket(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_other_bucket'", + ) + azure_query( + node2, f"INSERT INTO test_simple_merge_tree_different_bucket VALUES (1, 'a')" + ) + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_different_bucket_backup_different_bucket')" + print("BACKUP DEST", backup_destination) + azure_query( + node2, + f"BACKUP TABLE test_simple_merge_tree_different_bucket TO {backup_destination}", + ) + + assert not node2.contains_in_log("using native copy") + + azure_query( + node2, + f"RESTORE TABLE test_simple_merge_tree_different_bucket AS test_simple_merge_tree_different_bucket_restored FROM {backup_destination};", + ) + assert ( + azure_query( + node2, f"SELECT * from test_simple_merge_tree_different_bucket_restored" + ) + == "1\ta\n" + ) + + assert not node2.contains_in_log("using native copy") + + azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket") + azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket_restored") From 93b11c57a08504c60d57806955cae0764f95c933 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 19 May 2024 21:00:08 +0200 Subject: [PATCH 411/651] Fix azure backup uploaded block size --- .../copyAzureBlobStorageFile.cpp | 40 +++++++---- src/Storages/StorageAzureBlob.cpp | 1 + .../test.py | 71 +++++++++++++++++++ 3 files changed, 99 insertions(+), 13 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..f1e0a906971 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -219,19 +219,33 @@ namespace auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); - while (!read_buffer->eof()) + + const size_t strict_upload_part_size = settings->strict_upload_part_size + ? settings->strict_upload_part_size + : settings->max_upload_part_size; + size_t size_to_stage = task.part_size; + + PODArray memory; + memory.resize(std::min(size_to_stage, strict_upload_part_size)); + /// FIXME: it will be better to preallocate the memory and reuse it for each processUploadPartRequest. + WriteBufferFromVector> wb(memory); + + while (size_to_stage) { - auto size = read_buffer->available(); - if (size > 0) - { - auto block_id = getRandomASCIIString(64); - Azure::Core::IO::MemoryBodyStream memory(reinterpret_cast(read_buffer->position()), size); - block_blob_client.StageBlock(block_id, memory); - task.block_ids.emplace_back(block_id); - read_buffer->ignore(size); - LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}", dest_container_for_logging, dest_blob, block_id); - } + size_t size = std::min(size_to_stage, strict_upload_part_size); + wb.position() = wb.buffer().begin(); + copyData(*read_buffer, wb, size); + size_to_stage -= size; + + Azure::Core::IO::MemoryBodyStream stream(reinterpret_cast(memory.data()), size); + + const auto & block_id = task.block_ids.emplace_back(getRandomASCIIString(64)); + block_blob_client.StageBlock(block_id, stream); + + LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}, size: {} (strict part upload size: {})", + dest_container_for_logging, dest_blob, block_id, size, strict_upload_part_size); } + std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, Parts: {}", dest_container_for_logging, dest_blob, bg_tasks.size()); } @@ -321,8 +335,8 @@ void copyAzureBlobStorageFile( LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); auto create_read_buffer = [&] { - return std::make_unique(src_client, src_blob, read_settings, settings->max_single_read_retries, - settings->max_single_download_retries); + return std::make_unique( + src_client, src_blob, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); }; UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyAzureBlobStorageFile")}; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 9c551e82a99..69db97c6cec 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -254,6 +254,7 @@ AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPt auto settings_ptr = std::make_unique(); settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; + settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); return settings_ptr; diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 1a1458cb68e..3808bb100d9 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -299,3 +299,74 @@ def test_backup_restore_on_merge_tree(cluster): ) azure_query(node, f"DROP TABLE test_simple_merge_tree") azure_query(node, f"DROP TABLE test_simple_merge_tree_restored") + + +def test_backup_restore_correct_block_ids(cluster): + node = cluster.instances["node"] + azure_query( + node, + f""" + CREATE TABLE test_simple_merge_tree(key UInt64, data String) + Engine = MergeTree() + ORDER BY tuple() + SETTINGS storage_policy='blob_storage_policy'""", + ) + data_query = "SELECT number, repeat('a', 100) FROM numbers(1000)" + azure_query( + node, + f"INSERT INTO test_simple_merge_tree {data_query}", + ) + + strict_upload_part_size = 42 + data_path = "test_backup_correct_block_ids" + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{data_path}')" + azure_query( + node, + f""" + SET azure_strict_upload_part_size = {strict_upload_part_size}; + BACKUP TABLE test_simple_merge_tree TO {backup_destination}; + """, + ) + + port = cluster.azurite_port + connection_string = ( + f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" + ) + container_name = "cont" + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + container_client = blob_service_client.get_container_client(container_name) + blobs = container_client.list_blobs() + + data_blob = f"{data_path}/data/default/test_simple_merge_tree/all_1_1_0/data.bin" + found = False + for blob in blobs: + if data_blob == blob.get("name"): + found = True + break + assert found + + blob_client = blob_service_client.get_blob_client( + blob=data_blob, container=container_name + ) + + blocks_num = len(blob_client.get_block_list()[0]) + assert blocks_num > 100 + + count = 0 + for block in blob_client.get_block_list()[0]: + count += 1 + if count < blocks_num: + assert block.get("size") == strict_upload_part_size + else: + assert block.get("size") < strict_upload_part_size + + azure_query( + node, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", + ) + assert azure_query( + node, f"SELECT * from test_simple_merge_tree_restored ORDER BY key" + ) == node.query(data_query) From e8d66bf4d79d4ee1f3b18a4ccb1865f3f7ce7294 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 19:16:24 +0000 Subject: [PATCH 412/651] Cosmetics, pt. III --- src/Functions/serial.cpp | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp index 3da2f4ce218..de3036ad242 100644 --- a/src/Functions/serial.cpp +++ b/src/Functions/serial.cpp @@ -17,16 +17,16 @@ namespace ErrorCodes class FunctionSerial : public IFunction { private: - mutable zkutil::ZooKeeperPtr zk{nullptr}; + mutable zkutil::ZooKeeperPtr zk; ContextPtr context; public: static constexpr auto name = "serial"; - explicit FunctionSerial(ContextPtr ctx) : context(ctx) + explicit FunctionSerial(ContextPtr context_) : context(context_) { - if (ctx->hasZooKeeper()) { - zk = ctx->getZooKeeper(); + if (context->hasZooKeeper()) { + zk = context->getZooKeeper(); } } @@ -37,7 +37,6 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } - bool isStateful() const override { return true; } bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; } @@ -74,14 +73,14 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - size_t size = input_rows_count; - vec_to.resize(size); + + vec_to.resize(input_rows_count); const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString(); - // CAS in ZooKeeper - // `get` value and version, `trySet` new with version check - // I didn't get how to do it with `multi` + /// CAS in ZooKeeper + /// `get` value and version, `trySet` new with version check + /// I didn't get how to do it with `multi` Int64 counter; std::string counter_path = serial_path + "/counter"; @@ -93,10 +92,10 @@ public: Coordination::Stat stat; while (true) { - std::string counter_string = zk->get(counter_path, &stat); + const String counter_string = zk->get(counter_path, &stat); counter = std::stoll(counter_string); - std::string updated_counter = std::to_string(counter + input_rows_count); - Coordination::Error err = zk->trySet(counter_path, updated_counter); + String updated_counter = std::to_string(counter + input_rows_count); + const Coordination::Error err = zk->trySet(counter_path, updated_counter); if (err == Coordination::Error::ZOK) { // CAS is done @@ -111,7 +110,7 @@ public: } // Make a result - for (auto& val : vec_to) + for (auto & val : vec_to) { val = counter; ++counter; @@ -137,16 +136,16 @@ The server should be configured with a ZooKeeper. }, .returned_value = "Sequential numbers of type Int64 starting from the previous counter value", .examples{ - {"first call", "SELECT serial('name')", R"( -┌─serial('name')─┐ + {"first call", "SELECT serial('id1')", R"( +┌─serial('id1')──┐ │ 1 │ └────────────────┘)"}, - {"second call", "SELECT serial('name')", R"( -┌─serial('name')─┐ + {"second call", "SELECT serial('id1')", R"( +┌─serial('id1')──┐ │ 2 │ └────────────────┘)"}, - {"column call", "SELECT *, serial('name') FROM test_table", R"( -┌─CounterID─┬─UserID─┬─ver─┬─serial('name')─┐ + {"column call", "SELECT *, serial('id1') FROM test_table", R"( +┌─CounterID─┬─UserID─┬─ver─┬─serial('id1')──┐ │ 1 │ 3 │ 3 │ 3 │ │ 1 │ 1 │ 1 │ 4 │ │ 1 │ 2 │ 2 │ 5 │ From c2a613e94d40c1283eef2506554a467684b1c4b0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 19 May 2024 22:08:36 +0200 Subject: [PATCH 413/651] Better --- src/Core/Settings.h | 1 + .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 1 + .../AzureBlobStorage/AzureObjectStorage.h | 1 + .../copyAzureBlobStorageFile.cpp | 82 +++++++++++++------ src/Storages/StorageAzureBlob.cpp | 3 + .../test.py | 8 +- 6 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c555b5cb208..21af27cc60b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -80,6 +80,7 @@ class IColumn; M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_blocks_in_multipart_upload, 50000, "Maximum number of blocks in multipart upload for Azure.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index a535b007541..bae58f0b9c6 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -257,6 +257,7 @@ std::unique_ptr getAzureBlobStorageSettings(const Po settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f52ab803012..7ec36640772 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -63,6 +63,7 @@ struct AzureObjectStorageSettings bool use_native_copy = false; size_t max_unexpected_write_error_retries = 4; size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; size_t strict_upload_part_size = 0; size_t upload_part_size_multiply_factor = 2; size_t upload_part_size_multiply_parts_count_threshold = 500; diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index f1e0a906971..258ebda7fce 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -94,11 +94,56 @@ namespace void calculatePartSize() { - auto max_upload_part_size = settings->max_upload_part_size; - if (!max_upload_part_size) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be 0"); + if (!total_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen"); + + auto max_part_number = settings->max_blocks_in_multipart_upload; + const auto min_upload_part_size = settings->min_upload_part_size; + const auto max_upload_part_size = settings->max_upload_part_size; + + if (!max_part_number) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_blocks_in_multipart_upload must not be 0"); + else if (!min_upload_part_size) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "min_upload_part_size must not be 0"); + else if (max_upload_part_size < min_upload_part_size) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be less than min_upload_part_size"); + + size_t part_size = min_upload_part_size; + auto num_parts = (total_size + part_size - 1) / part_size; + + if (num_parts > max_part_number) + { + part_size = (total_size + max_part_number - 1) / max_part_number; + num_parts = (total_size + part_size - 1) / part_size; + } + + if (part_size > max_upload_part_size) + { + part_size = max_upload_part_size; + num_parts = (total_size + part_size - 1) / part_size; + } + + if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size) + { + String msg; + if (num_parts < 1) + msg = "Number of parts is zero"; + else if (num_parts > max_part_number) + msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number); + else if (part_size < min_upload_part_size) + msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size); + else + msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size); + + throw Exception( + ErrorCodes::INVALID_CONFIG_PARAMETER, + "{} while writing {} bytes to Azure. Check max_part_number = {}, " + "min_upload_part_size = {}, max_upload_part_size = {}", + msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size); + } + /// We've calculated the size of a normal part (the final part can be smaller). - normal_part_size = max_upload_part_size; + normal_part_size = part_size; } public: @@ -220,34 +265,21 @@ namespace auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); - const size_t strict_upload_part_size = settings->strict_upload_part_size - ? settings->strict_upload_part_size - : settings->max_upload_part_size; + /// task.part_size is already normalized according to min_upload_part_size and max_upload_part_size. size_t size_to_stage = task.part_size; PODArray memory; - memory.resize(std::min(size_to_stage, strict_upload_part_size)); - /// FIXME: it will be better to preallocate the memory and reuse it for each processUploadPartRequest. + memory.resize(size_to_stage); WriteBufferFromVector> wb(memory); - while (size_to_stage) - { - size_t size = std::min(size_to_stage, strict_upload_part_size); - wb.position() = wb.buffer().begin(); - copyData(*read_buffer, wb, size); - size_to_stage -= size; + copyData(*read_buffer, wb, size_to_stage); + Azure::Core::IO::MemoryBodyStream stream(reinterpret_cast(memory.data()), size_to_stage); - Azure::Core::IO::MemoryBodyStream stream(reinterpret_cast(memory.data()), size); + const auto & block_id = task.block_ids.emplace_back(getRandomASCIIString(64)); + block_blob_client.StageBlock(block_id, stream); - const auto & block_id = task.block_ids.emplace_back(getRandomASCIIString(64)); - block_blob_client.StageBlock(block_id, stream); - - LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}, size: {} (strict part upload size: {})", - dest_container_for_logging, dest_blob, block_id, size, strict_upload_part_size); - } - - std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race - LOG_TRACE(log, "Writing part finished. Container: {}, Blob: {}, Parts: {}", dest_container_for_logging, dest_blob, bg_tasks.size()); + LOG_TRACE(log, "Writing part. Container: {}, Blob: {}, block_id: {}, size: {}", + dest_container_for_logging, dest_blob, block_id, size_to_stage); } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 69db97c6cec..c6ae805005d 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -255,6 +255,9 @@ AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(const ContextPt settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; + settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; + settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; + settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); return settings_ptr; diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 3808bb100d9..7b4a38d12b2 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -317,14 +317,14 @@ def test_backup_restore_correct_block_ids(cluster): f"INSERT INTO test_simple_merge_tree {data_query}", ) - strict_upload_part_size = 42 + upload_part_size = 42 data_path = "test_backup_correct_block_ids" backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{data_path}')" azure_query( node, f""" - SET azure_strict_upload_part_size = {strict_upload_part_size}; + SET azure_min_upload_part_size = {upload_part_size}; BACKUP TABLE test_simple_merge_tree TO {backup_destination}; """, ) @@ -359,9 +359,9 @@ def test_backup_restore_correct_block_ids(cluster): for block in blob_client.get_block_list()[0]: count += 1 if count < blocks_num: - assert block.get("size") == strict_upload_part_size + assert block.get("size") == upload_part_size else: - assert block.get("size") < strict_upload_part_size + assert block.get("size") < upload_part_size azure_query( node, From ea774b673a6918941edc84b1df47b2a1a175766f Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 19 May 2024 22:25:56 +0200 Subject: [PATCH 414/651] Better test --- .../test.py | 102 ++++++++++-------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 7b4a38d12b2..07ef305a0be 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -317,56 +317,66 @@ def test_backup_restore_correct_block_ids(cluster): f"INSERT INTO test_simple_merge_tree {data_query}", ) - upload_part_size = 42 - data_path = "test_backup_correct_block_ids" + for min_upload_size, max_upload_size, max_blocks, expected_block_size in [ + (42, 100, 1000, 42), + (42, 52, 86, 52), + ]: + data_path = f"test_backup_correct_block_ids_{max_blocks}" - backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{data_path}')" - azure_query( - node, - f""" - SET azure_min_upload_part_size = {upload_part_size}; - BACKUP TABLE test_simple_merge_tree TO {backup_destination}; - """, - ) + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{data_path}')" + azure_query( + node, + f""" + SET azure_min_upload_part_size = {min_upload_size}; + SET azure_max_upload_part_size = {max_upload_size}; + SET azure_max_blocks_in_multipart_upload = {max_blocks}; + BACKUP TABLE test_simple_merge_tree TO {backup_destination}; + """, + ) - port = cluster.azurite_port - connection_string = ( - f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" - f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" - f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" - ) - container_name = "cont" - blob_service_client = BlobServiceClient.from_connection_string(connection_string) - container_client = blob_service_client.get_container_client(container_name) - blobs = container_client.list_blobs() + port = cluster.azurite_port + connection_string = ( + f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" + ) + container_name = "cont" + blob_service_client = BlobServiceClient.from_connection_string( + connection_string + ) + container_client = blob_service_client.get_container_client(container_name) + blobs = container_client.list_blobs() - data_blob = f"{data_path}/data/default/test_simple_merge_tree/all_1_1_0/data.bin" - found = False - for blob in blobs: - if data_blob == blob.get("name"): - found = True - break - assert found + data_blob = ( + f"{data_path}/data/default/test_simple_merge_tree/all_1_1_0/data.bin" + ) + found = False + for blob in blobs: + if data_blob == blob.get("name"): + found = True + break + assert found - blob_client = blob_service_client.get_blob_client( - blob=data_blob, container=container_name - ) + blob_client = blob_service_client.get_blob_client( + blob=data_blob, container=container_name + ) - blocks_num = len(blob_client.get_block_list()[0]) - assert blocks_num > 100 + blocks_num = len(blob_client.get_block_list()[0]) + assert blocks_num > 50 - count = 0 - for block in blob_client.get_block_list()[0]: - count += 1 - if count < blocks_num: - assert block.get("size") == upload_part_size - else: - assert block.get("size") < upload_part_size + count = 0 + for block in blob_client.get_block_list()[0]: + count += 1 + if count < blocks_num: + assert block.get("size") == expected_block_size + else: + assert block.get("size") < expected_block_size - azure_query( - node, - f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", - ) - assert azure_query( - node, f"SELECT * from test_simple_merge_tree_restored ORDER BY key" - ) == node.query(data_query) + azure_query( + node, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored_{max_blocks} FROM {backup_destination};", + ) + assert azure_query( + node, + f"SELECT * from test_simple_merge_tree_restored_{max_blocks} ORDER BY key", + ) == node.query(data_query) From 64a308013f6d0075fcf9d7c90d7e50cd9a3ae19e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 12:54:23 +0200 Subject: [PATCH 415/651] Tune mmap_rnd_bits to workaround sanitizers issues v1: vm.mmap_rnd_bits=28 v2: rebase with clang 18.1.6 + kernel.randomize_va_space=0 v3: leave only vm.mmap_rnd_bits=28 + use pre-run.sh (hope that it will be used), that way docker will not require --privileged and by some reason this breaks ASAN (though I cannot reproduce it) v4: use actions/common_setup over init_runner.sh (it requires some manual deploy) --- .github/actions/common_setup/action.yml | 7 +++++++ tests/ci/worker/prepare-ci-ami.sh | 2 ++ 2 files changed, 9 insertions(+) diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml index e492fa97816..b9299c64e72 100644 --- a/.github/actions/common_setup/action.yml +++ b/.github/actions/common_setup/action.yml @@ -28,3 +28,10 @@ runs: run: | # to remove every leftovers sudo rm -fr "$TEMP_PATH" && mkdir -p "$TEMP_PATH" + - name: Tune vm.mmap_rnd_bits for sanitizers + shell: bash + run: | + sudo sysctl vm.mmap_rnd_bits + # https://github.com/google/sanitizers/issues/856 + echo "Tune vm.mmap_rnd_bits for sanitizers" + sudo sysctl vm.mmap_rnd_bits=28 diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 92e97865b18..3e2f33c89d1 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -91,6 +91,8 @@ apt-get install --yes --no-install-recommends azure-cli # Increase the limit on number of virtual memory mappings to aviod 'Cannot mmap' error echo "vm.max_map_count = 2097152" > /etc/sysctl.d/01-increase-map-counts.conf +# Workarond for sanitizers uncompatibility with some kernels, see https://github.com/google/sanitizers/issues/856 +echo "vm.mmap_rnd_bits=28" > /etc/sysctl.d/02-vm-mmap_rnd_bits.conf systemctl restart docker From 08091006aa66ae8b01b56b83ed9fe74482c2672d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 20:36:08 +0000 Subject: [PATCH 416/651] Ignore exception when checking for cgroupsv2 --- base/base/cgroupsv2.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index bea2e99fa51..5c5ab150564 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -9,11 +9,18 @@ bool cgroupsV2Enabled() { #if defined(OS_LINUX) - /// This file exists iff the host has cgroups v2 enabled. - auto controllers_file = default_cgroups_mount / "cgroup.controllers"; - if (!std::filesystem::exists(controllers_file)) - return false; - return true; + try + { + /// This file exists iff the host has cgroups v2 enabled. + auto controllers_file = default_cgroups_mount / "cgroup.controllers"; + if (!std::filesystem::exists(controllers_file)) + return false; + return true; + } + catch (...) + { + return false; /// e.g. permission denied exception + } #else return false; #endif From 357ec69677956a5ba4fb4242952a695b3565f03e Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 19 May 2024 23:33:24 +0200 Subject: [PATCH 417/651] Fix style check --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 258ebda7fce..178eb398e24 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes { extern const int INVALID_CONFIG_PARAMETER; extern const int AZURE_BLOB_STORAGE_ERROR; + extern const int LOGICAL_ERROR; } namespace From 5d848aa32f1127098895cc29ad3200b5b325768a Mon Sep 17 00:00:00 2001 From: copperybean Date: Sun, 19 May 2024 23:20:40 +0800 Subject: [PATCH 418/651] update comment of method visitNullableBySteps, try to suppress clang-18 tidy warnings Change-Id: I3119c44dc764caed0dc471f52ac5e634c75c7b50 --- .../Impl/Parquet/ParquetDataValuesReader.cpp | 14 +++++++++++--- .../Formats/Impl/Parquet/ParquetDataValuesReader.h | 13 +++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp index 65f569ec264..b8e4db8700c 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp @@ -14,6 +14,17 @@ namespace ErrorCodes extern const int PARQUET_EXCEPTION; } +RleValuesReader::RleValuesReader( + std::unique_ptr bit_reader_, Int32 bit_width_) + : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) +{ + if (unlikely(bit_width >= 64)) + { + // e.g. in GetValue_ in bit_stream_utils.h, uint64 type is used to read bit values + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported bit width {}", bit_width); + } +} + void RleValuesReader::nextGroup() { // refer to: @@ -29,9 +40,6 @@ void RleValuesReader::nextGroup() { cur_group_size *= 8; cur_packed_bit_values.resize(cur_group_size); - - // try to suppress clang tidy warnings by assertion - assert(bit_width < 64); bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size); } else diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h index 0f916ff862d..75adb55df7e 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -18,8 +18,7 @@ namespace DB class RleValuesReader { public: - RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_) - : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) {} + RleValuesReader(std::unique_ptr bit_reader_, Int32 bit_width_); /** * @brief Used when the bit_width is 0, so all elements have same value. @@ -71,12 +70,14 @@ public: * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value * @tparam SteppedValidVisitor A callback with signature: * void(size_t cursor, const std::vector & valid_index_steps) - * for n valid elements with null value interleaved in a BitPacked group, + * valid_index_steps records the gap size between two valid elements, * i-th item in valid_index_steps describes how many elements there are * from i-th valid element (include) to (i+1)-th valid element (exclude). * - * take following BitPacked group with 2 valid elements for example: - * null valid null null valid null + * take following BitPacked group values for example, and assuming max_def_level is 1: + * [1, 0, 1, 1, 0, 1 ] + * null valid null null valid null + * the second line shows the corresponding validation state, * then the valid_index_steps has values [1, 3, 2]. * Please note that the the sum of valid_index_steps is same as elements number in this group. * @@ -117,7 +118,7 @@ private: std::vector cur_packed_bit_values; std::vector valid_index_steps; - Int32 bit_width; + const Int32 bit_width; UInt32 cur_group_size = 0; UInt32 cur_group_cursor = 0; From 63f31d3c1e2f4f562aebfe1c7a7cd26abad5ab1b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 06:13:06 +0200 Subject: [PATCH 419/651] Add retries in `git submodule update` --- docker/test/fasttest/run.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index bc7ffd1c2ef..4d5159cfa9e 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -160,10 +160,17 @@ function clone_submodules git submodule sync git submodule init - # --jobs does not work as fast as real parallel running - printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \ - xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \ - git submodule update --depth 1 --single-branch + + # Network is unreliable + for _ in {1..10} + do + # --jobs does not work as fast as real parallel running + printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \ + xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \ + git submodule update --depth 1 --single-branch && break + sleep 1 + done + git submodule foreach git reset --hard git submodule foreach git checkout @ -f git submodule foreach git clean -xfd From 9fefece70b624afcd79e5c016abab3a7f20f0922 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 08:37:48 +0200 Subject: [PATCH 420/651] Fix tidy --- src/Common/ThreadStatus.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index f2930513280..6ac4feac814 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -23,12 +23,8 @@ thread_local ThreadStatus constinit * current_thread = nullptr; namespace { -#if defined(__aarch64__) /// For aarch64 16K is not enough (likely due to tons of registers) -static constexpr size_t UNWIND_MINSIGSTKSZ = 32 << 10; -#else -static constexpr size_t UNWIND_MINSIGSTKSZ = 16 << 10; -#endif +constexpr size_t UNWIND_MINSIGSTKSZ = 32 << 10; /// Alternative stack for signal handling. /// From ea303e0d6346cd1bdd96c87ef767856db9425133 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 09:54:20 +0300 Subject: [PATCH 421/651] HDFS is unsupported --- docs/en/operations/storing-data.md | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 7005783dd60..59de4989941 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -7,27 +7,27 @@ title: "External Disks for Storing Data" Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported: 1. [Amazon S3](https://aws.amazon.com/s3/) object storage. -2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) -3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). +2. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). +3. Unsupported: The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) :::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. 1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine. -2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. -3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +2. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +3. Unsupported: to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. ::: ## Configuring external storage {#configuring-external-storage} -[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` (unsupported) using a disk with types `s3`, `azure_blob_storage`, `hdfs` (unsupported) accordingly. Disk configuration requires: -1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs` (unsupported), `local_blob_storage`, `web`. 2. Configuration of a specific external storage type. Starting from 24.1 clickhouse version, it is possible to use a new configuration option. It requires to specify: 1. `type` equal to `object_storage` -2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`. +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs` (unsupported), `local_blob_storage` (or just `local` from `24.3`), `web`. Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web` and, starting from `24.4`, `plain_rewritable`. Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them). @@ -328,7 +328,7 @@ Configuration: ``` -Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type. +Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs` (unsupported), `local`) using `plain` metadata type. Configuration: ``` xml @@ -428,12 +428,14 @@ Examples of working configurations can be found in integration tests directory ( Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: -## Using HDFS storage {#hdfs-storage} +## Using HDFS storage (Unsupported) In this sample configuration: -- the disk is of type `hdfs` +- the disk is of type `hdfs` (unsupported) - the data is hosted at `hdfs://hdfs1:9000/clickhouse/` +By the way, HDFS is unsupported and won't work. + ```xml @@ -464,9 +466,11 @@ In this sample configuration: ``` +Keep in mind that HDFS does not work at all. + ### Using Data Encryption {#encrypted-virtual-file-system} -You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. +You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) (unsupported) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. Example of disk configuration: @@ -529,7 +533,7 @@ Example of disk configuration: It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. -For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS. +For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS (unsupported). Cache uses `LRU` cache policy. @@ -971,7 +975,7 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt ### Zero-copy Replication (not ready for production) {#zero-copy} -Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. +Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` (unsupported) disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. From f7c9fa696f91c306550ed3435bb51999983903e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 09:45:23 +0200 Subject: [PATCH 422/651] Fix UBSan error in negative positional arguments --- src/Interpreters/replaceForPositionalArguments.cpp | 2 +- .../03157_negative_positional_arguments_ubsan.reference | 0 .../0_stateless/03157_negative_positional_arguments_ubsan.sql | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference create mode 100644 tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index cceb0650fcd..3d60723a167 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -44,7 +44,7 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel pos = value; else { - if (static_cast(std::abs(value)) > columns.size()) + if (value < -static_cast(columns.size())) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]", diff --git a/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql new file mode 100644 index 00000000000..ddf5185c945 --- /dev/null +++ b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql @@ -0,0 +1 @@ +SELECT 1 GROUP BY -9223372036854775808; -- { serverError BAD_ARGUMENTS } From 95d680e20423490e74bed25bbd865a6d033dbee8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 20 May 2024 10:09:57 +0200 Subject: [PATCH 423/651] Remove useless test --- tests/queries/0_stateless/00694_max_block_size_zero.reference | 0 tests/queries/0_stateless/00694_max_block_size_zero.sql | 4 ---- 2 files changed, 4 deletions(-) delete mode 100644 tests/queries/0_stateless/00694_max_block_size_zero.reference delete mode 100644 tests/queries/0_stateless/00694_max_block_size_zero.sql diff --git a/tests/queries/0_stateless/00694_max_block_size_zero.reference b/tests/queries/0_stateless/00694_max_block_size_zero.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/00694_max_block_size_zero.sql b/tests/queries/0_stateless/00694_max_block_size_zero.sql deleted file mode 100644 index ba5b513bb5d..00000000000 --- a/tests/queries/0_stateless/00694_max_block_size_zero.sql +++ /dev/null @@ -1,4 +0,0 @@ -SET send_logs_level = 'fatal'; - -SET max_block_size = 0; -SELECT number FROM system.numbers; -- { serverError 12 } From 1525ca4cf02479e53ebb670720ea385ddb7670a1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 12:09:47 +0200 Subject: [PATCH 424/651] Disable pretty format restrictions when stdout is not TTY Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 4 ++-- .../queries/0_stateless/03160_pretty_format_tty.reference | 1 + tests/queries/0_stateless/03160_pretty_format_tty.sh | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03160_pretty_format_tty.reference create mode 100755 tests/queries/0_stateless/03160_pretty_format_tty.sh diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 67aba2256e8..4441d884754 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -710,8 +710,8 @@ void ClientBase::adjustSettings() settings.input_format_values_allow_data_after_semicolon.changed = false; } - /// Do not limit pretty format output in case of --pager specified. - if (!pager.empty()) + /// Do not limit pretty format output in case of --pager specified or in case of stdout is not a tty. + if (!pager.empty() || !stdout_is_a_tty) { if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed) { diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.reference b/tests/queries/0_stateless/03160_pretty_format_tty.reference new file mode 100644 index 00000000000..6a5b453966d --- /dev/null +++ b/tests/queries/0_stateless/03160_pretty_format_tty.reference @@ -0,0 +1 @@ +100004 diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.sh b/tests/queries/0_stateless/03160_pretty_format_tty.sh new file mode 100755 index 00000000000..bbc4b96eb90 --- /dev/null +++ b/tests/queries/0_stateless/03160_pretty_format_tty.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# default output_format_pretty_max_rows is 10K +$CLICKHOUSE_LOCAL -q "select * from numbers(100e3) format PrettySpace settings max_threads=1" | wc -l From 434816820d31e09873453024e9563fca06b5b3d4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 15:27:13 +0200 Subject: [PATCH 425/651] Fix upgrade settings changes check settings changes check simply checks that the settings are either the same, or there is a record in system.settings_changes, however, the problem is that clickhouse-local now adjusts some settings for Pretty format in case of stdout is not a tty, and this is the case for this check. So to avoid this, just run the clickhouse-local under script(1) to fool it. Signed-off-by: Azat Khuzhin --- docker/test/upgrade/run.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 6761ddba3e5..29174cc87e6 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -58,8 +58,14 @@ echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/sys # Install previous release packages install_packages previous_release_package_folder -# Save old settings from system table for settings changes check -clickhouse-local -q "select * from system.settings format Native" > old_settings.native +# NOTE: we need to run clickhouse-local under script to get settings without any adjustments, like clickhouse-local does in case of stdout is not a tty +function save_settings_clean() +{ + local out=$1 && shift + script -q -c "clickhouse-local -q \"select * from system.settings into outfile '$out'\"" --log-out /dev/null +} + +save_settings_clean 'old_settings.native' # Initial run without S3 to create system.*_log on local file system to make it # available for dump via clickhouse-local @@ -183,7 +189,7 @@ configure IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'") if [ "${IS_SANITIZED}" -eq "0" ] then - clickhouse-local -q "select * from system.settings format Native" > new_settings.native + save_settings_clean 'new_settings.native' clickhouse-local -nmq " CREATE TABLE old_settings AS file('old_settings.native'); CREATE TABLE new_settings AS file('new_settings.native'); From 925ed89195008a787eeebd9213ac59f1a8adb17b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 10:31:32 +0200 Subject: [PATCH 426/651] More instrumentation around index --- src/Common/CurrentMetrics.cpp | 6 ++++-- src/Databases/DatabaseOnDisk.cpp | 3 +++ ...ObjectStorageRemoteMetadataRestoreHelper.cpp | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 3 ++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 17 ++++++++++++++++- src/Storages/System/StorageSystemReplicas.cpp | 2 ++ 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index b9916130bb9..21b4d114d79 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -288,8 +288,10 @@ M(HTTPConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for http hosts") \ \ M(AddressesActive, "Total count of addresses which are used for creation connections with connection pools") \ - M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \ - + M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \ + \ + M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \ + M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 5b9723fabc5..161be35f129 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -26,6 +26,8 @@ #include #include #include +#include + namespace fs = std::filesystem; @@ -665,6 +667,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat pool.scheduleOrThrowOnError( [batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable { + setThreadName("DatabaseOnDisk"); for (const auto & file : batch) if (file.second) process_metadata_file(file.first); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 0314e0a7e92..18a0377efe7 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -129,6 +129,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu { pool.scheduleOrThrowOnError([this, path] { + setThreadName("BackupWorker"); for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) migrateFileToRestorableSchema(it->path()); }); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index ab29c64184d..d72f3d81549 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -695,7 +695,6 @@ String serializeQuery(const IAST & query, size_t max_length) } -// static void AsynchronousInsertQueue::processData( InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history) try @@ -705,6 +704,8 @@ try SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size())); + setThreadName("AsyncInsertQ"); + const auto log = getLogger("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index de769c59d33..2b1b1b26347 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -53,6 +53,8 @@ namespace CurrentMetrics extern const Metric MergeTreeDataSelectExecutorThreads; extern const Metric MergeTreeDataSelectExecutorThreadsActive; extern const Metric MergeTreeDataSelectExecutorThreadsScheduled; + extern const Metric FilteringMarksWithPrimaryKey; + extern const Metric FilteringMarksWithSecondaryKeys; } namespace DB @@ -664,15 +666,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal(); if (metadata_snapshot->hasPrimaryKey() || part_offset_condition) + { + CurrentMetrics::Increment metric(CurrentMetrics::FilteringMarksWithPrimaryKey); ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, part_offset_condition, settings, log); + } else if (total_marks_count) + { ranges.ranges = MarkRanges{{MarkRange{0, total_marks_count}}}; + } sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed); if (!ranges.ranges.empty()) sum_parts_pk.fetch_add(1, std::memory_order_relaxed); + CurrentMetrics::Increment metric(CurrentMetrics::FilteringMarksWithSecondaryKeys); + for (size_t idx = 0; idx < skip_indexes.useful_indices.size(); ++idx) { if (ranges.ranges.empty()) @@ -733,6 +742,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd num_threads = std::min(num_streams, settings.max_threads_for_indexes); } + LOG_TRACE(log, "Filtering marks by primary and secondary keys"); + if (num_threads <= 1) { for (size_t part_index = 0; part_index < parts.size(); ++part_index) @@ -740,7 +751,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd } else { - /// Parallel loading of data parts. + /// Parallel loading and filtering of data parts. ThreadPool pool( CurrentMetrics::MergeTreeDataSelectExecutorThreads, CurrentMetrics::MergeTreeDataSelectExecutorThreadsActive, @@ -748,8 +759,11 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd num_threads); for (size_t part_index = 0; part_index < parts.size(); ++part_index) + { pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()] { + setThreadName("MergeTreeIndex"); + SCOPE_EXIT_SAFE( if (thread_group) CurrentThread::detachFromGroupIfNotDetached(); @@ -759,6 +773,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd process_part(part_index); }); + } pool.wait(); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 5045dec3682..10d5c353c43 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -141,6 +141,8 @@ public: if (thread_group) CurrentThread::attachToGroupIfDetached(thread_group); + setThreadName("SystemReplicas"); + try { ReplicatedTableStatus status; From ad5f6f27dff104f6229819be27fba3732226603e Mon Sep 17 00:00:00 2001 From: copperybean Date: Mon, 20 May 2024 16:28:21 +0800 Subject: [PATCH 427/651] fix reader type, update comment Change-Id: Iefec91bca223eedaabe302b7891808c6d94eed9d --- .../Impl/Parquet/ParquetDataValuesReader.h | 1 + .../Impl/Parquet/ParquetRecordReader.cpp | 29 ++++++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h index 75adb55df7e..fbccb612b3c 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h +++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h @@ -80,6 +80,7 @@ public: * the second line shows the corresponding validation state, * then the valid_index_steps has values [1, 3, 2]. * Please note that the the sum of valid_index_steps is same as elements number in this group. + * TODO the definition of valid_index_steps should be updated when supporting nested types * * @tparam RepeatedVisitor A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count) */ diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 0b797dd66ad..69da40b47e6 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -27,6 +27,7 @@ namespace DB namespace ErrorCodes { + extern const int NOT_IMPLEMENTED; extern const int PARQUET_EXCEPTION; } @@ -225,7 +226,7 @@ std::unique_ptr ColReaderFactory::fromInt32INT(const parque { switch (int_type.bit_width()) { - case sizeof(Int32): + case 32: { if (int_type.is_signed()) return makeLeafReader(); @@ -241,7 +242,7 @@ std::unique_ptr ColReaderFactory::fromInt64INT(const parque { switch (int_type.bit_width()) { - case sizeof(Int64): + case 64: { if (int_type.is_signed()) return makeLeafReader(); @@ -312,16 +313,28 @@ ParquetRecordReader::ParquetRecordReader( { log = &Poco::Logger::get("ParquetRecordReader"); + std::unordered_map parquet_columns; + auto root = file_reader->metadata()->schema()->group_node(); + for (int i = 0; i < root->field_count(); ++i) + { + auto & node = root->field(i); + parquet_columns.emplace(node->name(), node); + } + parquet_col_indice.reserve(header.columns()); column_readers.reserve(header.columns()); for (const auto & col_with_name : header) { - auto idx = file_reader->metadata()->schema()->ColumnIndex(col_with_name.name); - if (idx < 0) - { - auto msg = PreformattedMessage::create("can not find column with name: {}", col_with_name.name); - throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION); - } + auto it = parquet_columns.find(col_with_name.name); + if (it == parquet_columns.end()) + throw Exception(ErrorCodes::PARQUET_EXCEPTION, "no column with '{}' in parquet file", col_with_name.name); + + auto node = it->second; + if (!node->is_primitive()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "arrays and maps are not implemented in native parquet reader"); + + auto idx = file_reader->metadata()->schema()->ColumnIndex(*node); + chassert(idx >= 0); parquet_col_indice.push_back(idx); } if (reader_properties.pre_buffer()) From 314752e044bcdf5d7516d5188e7e6a4302467b90 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 11:55:44 +0200 Subject: [PATCH 428/651] Update src/Backups/BackupIO_AzureBlobStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Backups/BackupIO_AzureBlobStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 672a68e089f..5f6495e5733 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -82,7 +82,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) { auto destination_data_source_description = destination_disk->getDataSourceDescription(); - LOG_TRACE(log, "Source description {} desctionation description {}", data_source_description.description, destination_data_source_description.description); + LOG_TRACE(log, "Source description {}, desctionation description {}", data_source_description.description, destination_data_source_description.description); if (destination_data_source_description.sameKind(data_source_description) && destination_data_source_description.is_encrypted == encrypted_in_backup) { From 6a7a09a1f902ec4fb54cd19f9368581cddf6da1e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 11:55:50 +0200 Subject: [PATCH 429/651] Update src/Backups/BackupIO_AzureBlobStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Backups/BackupIO_AzureBlobStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 5f6495e5733..b035125a545 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -150,7 +150,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible. auto source_data_source_description = src_disk->getDataSourceDescription(); - LOG_TRACE(log, "Source description {} desctionation description {}", source_data_source_description.description, data_source_description.description); + LOG_TRACE(log, "Source description {}, desctionation description {}", source_data_source_description.description, data_source_description.description); if (source_data_source_description.sameKind(data_source_description) && source_data_source_description.is_encrypted == copy_encrypted) { From 23e87ef80ee332b71d5e9a1d51e81de9d4626a84 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 11:56:44 +0200 Subject: [PATCH 430/651] Review fix --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index dc46de1e07f..667e63729ca 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -289,7 +289,7 @@ void copyAzureBlobStorageFile( if (settings->use_native_copy) { - LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); From 4609a330defe4e948e0b978ba7a48575a8a4b43c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 20 May 2024 12:37:07 +0200 Subject: [PATCH 431/651] Update settings changes history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 65c8934cb23..d512e3bc3ae 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"http_max_chunk_size", 0, 0, "Internal limitation"}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, + {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, From e959ba9578337b30fe6765ab4d9bcf03fb7668a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 13:38:19 +0300 Subject: [PATCH 432/651] Update docs/en/operations/storing-data.md Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- docs/en/operations/storing-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 59de4989941..e9370c02463 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -434,7 +434,7 @@ In this sample configuration: - the disk is of type `hdfs` (unsupported) - the data is hosted at `hdfs://hdfs1:9000/clickhouse/` -By the way, HDFS is unsupported and won't work. +By the way, HDFS is unsupported and therefore there might be issues when using it. Feel free to make a pull request with the fix if any issue arises. ```xml From 7d3a601eccef2094f60b61cf295a6f88d9fbd0f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 13:39:07 +0300 Subject: [PATCH 433/651] Update docs/en/operations/storing-data.md --- docs/en/operations/storing-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index e9370c02463..9b316960750 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -466,7 +466,7 @@ By the way, HDFS is unsupported and therefore there might be issues when using i ``` -Keep in mind that HDFS does not work at all. +Keep in mind that HDFS may not work in corner cases. ### Using Data Encryption {#encrypted-virtual-file-system} From d32c059e7f83830cc8654ddc24e55008363df113 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 20 May 2024 13:48:14 +0200 Subject: [PATCH 434/651] Move changes from private --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 8 ++++++-- src/Storages/MergeTree/IMergeTreeDataPart.h | 13 +++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 463ca07ec57..4a34f828801 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -793,7 +793,8 @@ void IMergeTreeDataPart::addProjectionPart( projection_parts[projection_name] = std::move(projection_part); } -void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded) +void IMergeTreeDataPart::loadProjections( + bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded, bool only_metadata) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) @@ -813,7 +814,10 @@ void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool ch try { - part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); + if (only_metadata) + part->loadChecksums(require_columns_checksums); + else + part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); } catch (...) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c380f99060e..9ee01c0efc4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -445,7 +445,15 @@ public: bool hasBrokenProjection(const String & projection_name) const; /// Return true, if all projections were loaded successfully and none was marked as broken. - void loadProjections(bool require_columns_checksums, bool check_consistency, bool & has_broken_projection, bool if_not_loaded = false); + void loadProjections( + bool require_columns_checksums, + bool check_consistency, + bool & has_broken_projection, + bool if_not_loaded = false, + bool only_metadata = false); + + /// If checksums.txt exists, reads file's checksums (and sizes) from it + void loadChecksums(bool require); void setBrokenReason(const String & message, int code) const; @@ -671,9 +679,6 @@ private: static void appendFilesOfColumns(Strings & files); - /// If checksums.txt exists, reads file's checksums (and sizes) from it - void loadChecksums(bool require); - static void appendFilesOfChecksums(Strings & files); /// Loads marks index granularity into memory From 6f56642567cea3867a57f1f4c96c4c3a35be1ed7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 20 May 2024 13:13:50 +0000 Subject: [PATCH 435/651] Incorporate review feedback --- base/base/cgroupsv2.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index 5c5ab150564..f20b9daf22e 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -17,9 +17,9 @@ bool cgroupsV2Enabled() return false; return true; } - catch (...) + catch (const std::filesystem::filesystem_error &) /// all "underlying OS API errors", typically: permission denied { - return false; /// e.g. permission denied exception + return false; /// not logging the exception as most callers fall back to cgroups v1 } #else return false; From c4f47b907182e58c12dcf7934e4745ad8048788d Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 20 May 2024 14:09:17 +0000 Subject: [PATCH 436/651] set greem mergeable status in upstream pr from finish check in sync --- tests/ci/commit_status_helper.py | 2 ++ tests/ci/finish_check.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 733b07813a5..34e2d9f8085 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -522,6 +522,7 @@ def update_upstream_sync_status( sync_pr_number: int, gh: Github, state: StatusType, + can_set_green_mergeable_status: bool = False, ) -> None: upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) upstream_pr = upstream_repo.get_pull(upstream_pr_number) @@ -571,4 +572,5 @@ def update_upstream_sync_status( upstream_commit, get_commit_filtered_statuses(upstream_commit), True, + set_if_green=can_set_green_mergeable_status, ) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index b31be7654d3..1a7000f5353 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -51,7 +51,13 @@ def main(): and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY ): upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1]) - update_upstream_sync_status(upstream_pr_number, pr_info.number, gh, state) + update_upstream_sync_status( + upstream_pr_number, + pr_info.number, + gh, + state, + can_set_green_mergeable_status=True, + ) statuses = [s for s in statuses if s.context == StatusNames.CI] if not statuses: From fb619bac2f664fc0fa337fae3174332d397ce7f3 Mon Sep 17 00:00:00 2001 From: Maciej Bak Date: Mon, 20 May 2024 16:35:06 +0200 Subject: [PATCH 437/651] Fix backup all documentation --- docs/en/operations/backup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2ba50b39934..46c24ad8491 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -22,7 +22,7 @@ description: In order to effectively mitigate possible human errors, you should TEMPORARY TABLE table_name [AS table_name_in_backup] | VIEW view_name [AS view_name_in_backup] ALL TEMPORARY TABLES [EXCEPT ...] | - ALL DATABASES [EXCEPT ...] } [,...] + ALL [EXCEPT ...] } [,...] [ON CLUSTER 'cluster_name'] TO|FROM File('/') | Disk('', '/') | S3('/', '', '') [SETTINGS base_backup = File('/') | Disk(...) | S3('/', '', '')] From cb912a656e5245203b85d47fe6f6b0674319fd02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 17:07:35 +0200 Subject: [PATCH 438/651] Losen build resource limits for unusual architectures --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index abbc48ab23a..afea22a1b90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # Sanitizers are too heavy - if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE) + # Sanitizers are too heavy. Some architectures too. + if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE OR ARCH_RISCV64 OR ARCH_LOONGARCH64) set (RLIMIT_DATA 10000000000) # 10G endif() From 85998672f01ab17f3dff9d8f430d28204d4de28c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 13 May 2024 10:21:03 +0200 Subject: [PATCH 439/651] tests: attempt to fix 02340_parts_refcnt_mergetree flakiness I'm not sure that this was the case, but I don't see any other reasons for now. I've also looked at the logs and trace_log, and did not noticed anything interesting. Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 208a9038681..0a96cc0e774 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -9,7 +9,11 @@ function check_refcnt_for_table() { local table=$1 && shift - $CLICKHOUSE_CLIENT -q "system stop merges $table" + $CLICKHOUSE_CLIENT -nm -q " + system stop merges $table; + -- cleanup thread may hold the parts lock + system stop cleanup $table; + " $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)" local query_id From 0832f961070ea8e2ffd294a577036f8000ec49bf Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 14:30:25 +0200 Subject: [PATCH 440/651] tests: 02340_parts_refcnt_mergetree cleanup Signed-off-by: Azat Khuzhin --- .../0_stateless/02340_parts_refcnt_mergetree.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 0a96cc0e774..2d4e306cb3c 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -19,13 +19,19 @@ function check_refcnt_for_table() local query_id query_id="$table-$(random_str 10)" - SETTINGS="--format Null --max_threads 1 --max_block_size 1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0" + local args=( + --format Null + --max_threads 1 + --max_block_size 1 + --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0 + --query_id "$query_id" + ) # Notes: # - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier - $CLICKHOUSE_CLIENT $SETTINGS --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" & + $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(1) from $table where part = 1" & PID=$! # wait for query to be started From 722b7ab5229c6e0267b6941363dd446220f337d0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 14:43:42 +0200 Subject: [PATCH 441/651] tests: improve 02340_parts_refcnt_mergetree (from 12sec to 1sec) Signed-off-by: Azat Khuzhin --- .../02340_parts_refcnt_mergetree.sh | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 2d4e306cb3c..5fae571e217 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -19,33 +19,33 @@ function check_refcnt_for_table() local query_id query_id="$table-$(random_str 10)" + local log_file + log_file=$(mktemp "$CUR_DIR/clickhouse-tests.XXXXXX.log") local args=( --format Null --max_threads 1 --max_block_size 1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0 --query_id "$query_id" + --send_logs_level "test" + --server_logs_file "$log_file" ) # Notes: - # - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts + # - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier - $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(1) from $table where part = 1" & + $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" & PID=$! - # wait for query to be started - while [ "$($CLICKHOUSE_CLIENT -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do - sleep 0.1 - done - # When the query only starts it execution it holds reference for each part, # however when it starts reading, partition pruning takes place, # and it should hold only parts that are required for SELECT # - # But to reach partition prune the function sleepEachRow() will be executed twice, - # so 2 seconds for sleepEachRow() and 3 seconds just to ensure that it enters the reading stage. - sleep $((2+3)) + # So let's wait while the reading will be started. + while ! grep -F -q -e "Exception" -e "MergeTreeRangeReader" "$log_file"; do + sleep 0.1 + done # NOTE: parts that are used in query will have refcount increased for each range $CLICKHOUSE_CLIENT -q "select table, name, refcount from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount > 1" @@ -53,6 +53,8 @@ function check_refcnt_for_table() # Kill the query gracefully. kill -INT $PID wait $PID + grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED + rm -f "${log_file:?}" } # NOTE: index_granularity=1 to cancel ASAP From a74fa022797678a34de30c891be74caeb5e2802a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 14:52:04 +0200 Subject: [PATCH 442/651] tests: reduce pobability of extra parts holders in 02340_parts_refcnt_mergetree Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 5fae571e217..d7e1a562ff4 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -13,6 +13,8 @@ function check_refcnt_for_table() system stop merges $table; -- cleanup thread may hold the parts lock system stop cleanup $table; + -- queue may hold the parts lock for awhile as well + system stop pulling replication log $table; " $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)" @@ -64,11 +66,13 @@ $CLICKHOUSE_CLIENT -nmq " create table data_02340 (key Int, part Int) engine=MergeTree() partition by part order by key settings index_granularity=1; " || exit 1 check_refcnt_for_table data_02340 +$CLICKHOUSE_CLIENT -q "drop table data_02340 sync" $CLICKHOUSE_CLIENT -nmq " drop table if exists data_02340_rep sync; create table data_02340_rep (key Int, part Int) engine=ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') partition by part order by key settings index_granularity=1; " || exit 1 check_refcnt_for_table data_02340_rep +$CLICKHOUSE_CLIENT -q "drop table data_02340_rep sync" exit 0 From 599470241c7072fd9589102dca635f7c7d063ab7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 20 May 2024 18:01:02 +0200 Subject: [PATCH 443/651] tests: reduce flakienss of 02340_parts_refcnt_mergetree Signed-off-by: Azat Khuzhin --- .../0_stateless/02340_parts_refcnt_mergetree.reference | 4 ++-- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference index ae4fafae829..b06fee4af06 100644 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference @@ -1,2 +1,2 @@ -data_02340 1_2_2_0 6 -data_02340_rep 1_0_0_0 6 +data_02340 1_2_2_0 1 +data_02340_rep 1_0_0_0 1 diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index d7e1a562ff4..caa600298ce 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -49,8 +49,12 @@ function check_refcnt_for_table() sleep 0.1 done - # NOTE: parts that are used in query will have refcount increased for each range - $CLICKHOUSE_CLIENT -q "select table, name, refcount from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount > 1" + # NOTE: parts that are used in query will be holded in multiple places, and + # this is where magic 6 came from. Also there could be some other + # background threads (i.e. asynchronous metrics) that uses the part, so we + # simply filter parts not by "refcount > 1" but with some delta - "3", to + # avoid flakiness. + $CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3" # Kill the query gracefully. kill -INT $PID From d6fadfc9a6d1ec3e064dc55ae96a13463bb49676 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 18:18:18 +0200 Subject: [PATCH 444/651] Azure can do native copy between containers --- src/Backups/BackupIO_AzureBlobStorage.cpp | 8 ++++---- src/Disks/DiskType.cpp | 12 ++++++++++-- .../AzureBlobStorage/AzureObjectStorage.h | 3 ++- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 2 +- src/Storages/StorageAzureBlob.cpp | 8 +------- src/Storages/StorageAzureBlob.h | 2 -- src/TableFunctions/TableFunctionAzureBlobStorage.cpp | 4 ++-- .../TableFunctionAzureBlobStorageCluster.cpp | 4 ++-- .../test_azure_blob_storage_native_copy/test.py | 12 ++++++++---- 9 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index b035125a545..331cace67d7 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURLWithContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} , configuration(configuration_) { auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); @@ -47,7 +47,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( std::move(client_ptr), StorageAzureBlob::createSettings(context_), configuration.container, - configuration.getConnectionURLWithContainer()); + configuration.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); auto settings_copy = *object_storage->getSettings(); @@ -128,7 +128,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURLWithContainer(), false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} , configuration(configuration_) { auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); @@ -138,7 +138,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( std::move(client_ptr), StorageAzureBlob::createSettings(context_), configuration_.container, - configuration.getConnectionURLWithContainer()); + configuration_.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); auto settings_copy = *object_storage->getSettings(); settings_copy.use_native_copy = allow_azure_native_copy; diff --git a/src/Disks/DiskType.cpp b/src/Disks/DiskType.cpp index 448e173a30f..07a7099419b 100644 --- a/src/Disks/DiskType.cpp +++ b/src/Disks/DiskType.cpp @@ -32,8 +32,16 @@ bool DataSourceDescription::operator==(const DataSourceDescription & other) cons bool DataSourceDescription::sameKind(const DataSourceDescription & other) const { - return std::tie(type, object_storage_type, description) - == std::tie(other.type, other.object_storage_type, other.description); + std::string_view our_description = description; + if (our_description.ends_with('/') && our_description.length() > 1) + our_description = our_description.substr(0, our_description.length() - 1); + + std::string_view other_description = other.description; + if (other_description.ends_with('/') && other_description.length() > 1) + other_description = other_description.substr(0, other_description.length() - 1); + + return std::tie(type, object_storage_type, our_description) + == std::tie(other.type, other.object_storage_type, other_description); } std::string DataSourceDescription::toString() const diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 3d94090bcc6..2619c1a1e83 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -173,7 +173,8 @@ private: MultiVersion client; MultiVersion settings; const String object_namespace; /// container + prefix - const String description; /// url + container + + const String description; /// source url without container and prefix LoggerPtr log; }; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index cddcea979b5..cc53054c775 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -313,7 +313,7 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix, - endpoint.prefix.empty() ? endpoint_string : endpoint_string.substr(0, endpoint_string.length() - endpoint.prefix.length())); + endpoint.getEndpointWithoutContainer()); }; factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 0103fc0d2a2..e1c6ec0097c 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -303,7 +303,7 @@ void registerStorageAzureBlob(StorageFactory & factory) return std::make_shared( configuration, - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), + std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), args.getContext(), args.table_id, args.columns, @@ -491,12 +491,6 @@ Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); } -std::string StorageAzureBlob::Configuration::getConnectionURLWithContainer() const -{ - auto url = getConnectionURL(); - return fs::path(url.toString()) / container; -} - bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const { if (!withPartitionWildcard()) diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 7bce40bce26..b433cd92d68 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -45,8 +45,6 @@ public: Poco::URI getConnectionURL() const; - std::string getConnectionURLWithContainer() const; - std::string connection_url; bool is_connection_string; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index e73277b4d7b..7a17db2a1a8 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -333,7 +333,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()); + auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()); if (configuration.format == "auto") return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); @@ -365,7 +365,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct StoragePtr storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), context, StorageID(getDatabaseName(), table_name), columns, diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index dc65426a6e3..02b24dccf86 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -39,7 +39,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( /// On worker node this filename won't contains globs storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), context, StorageID(getDatabaseName(), table_name), columns, @@ -54,7 +54,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( storage = std::make_shared( cluster_name, configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURL().toString()), StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py index b16d9b4b5c4..a47688d0713 100644 --- a/tests/integration/test_azure_blob_storage_native_copy/test.py +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -33,7 +33,7 @@ def generate_config(port): local object_storage azure_blob_storage - http://azurite1:{port}/devstoreaccount1 + http://azurite1:{port}/devstoreaccount1/ cont false devstoreaccount1 @@ -45,7 +45,7 @@ def generate_config(port): object_storage azure_blob_storage true - http://azurite1:{port}/devstoreaccount1 + http://azurite1:{port}/devstoreaccount1/ othercontainer false devstoreaccount1 @@ -175,10 +175,14 @@ def test_backup_restore_on_merge_tree_same_container(cluster): assert ( azure_query(node1, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n" ) + + assert node1.contains_in_log("using native copy") + azure_query(node1, f"DROP TABLE test_simple_merge_tree") azure_query(node1, f"DROP TABLE test_simple_merge_tree_restored") + def test_backup_restore_on_merge_tree_different_container(cluster): node2 = cluster.instances["node2"] azure_query( @@ -196,7 +200,7 @@ def test_backup_restore_on_merge_tree_different_container(cluster): f"BACKUP TABLE test_simple_merge_tree_different_bucket TO {backup_destination}", ) - assert not node2.contains_in_log("using native copy") + assert node2.contains_in_log("using native copy") azure_query( node2, @@ -209,7 +213,7 @@ def test_backup_restore_on_merge_tree_different_container(cluster): == "1\ta\n" ) - assert not node2.contains_in_log("using native copy") + assert node2.contains_in_log("using native copy") azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket") azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket_restored") From 577bdf6b17d83a235c1ecee719fa5d157ee6a13d Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 20 May 2024 18:27:28 +0200 Subject: [PATCH 445/651] Ping CI From 235b0f2d5b2f3a3fc424be7aa5349e379a27dbd1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 20 May 2024 16:28:46 +0000 Subject: [PATCH 446/651] Automatic style fix --- tests/integration/test_azure_blob_storage_native_copy/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py index a47688d0713..4f543e4c8b2 100644 --- a/tests/integration/test_azure_blob_storage_native_copy/test.py +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -182,7 +182,6 @@ def test_backup_restore_on_merge_tree_same_container(cluster): azure_query(node1, f"DROP TABLE test_simple_merge_tree_restored") - def test_backup_restore_on_merge_tree_different_container(cluster): node2 = cluster.instances["node2"] azure_query( From dcb2ed8b9b16db16841dcd52513a2eebd8ad3223 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 20 May 2024 18:34:47 +0200 Subject: [PATCH 447/651] Update src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h --- src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 2619c1a1e83..f12ebb68dbb 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -174,7 +174,8 @@ private: MultiVersion settings; const String object_namespace; /// container + prefix - const String description; /// source url without container and prefix + /// We use source url without container and prefix as description, because in Azure there are no limitations for operations between different containers. + const String description; LoggerPtr log; }; From b43fd17fc5434ec73f9b6c421b432f16aedc862f Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 20 May 2024 16:57:44 +0000 Subject: [PATCH 448/651] Review changes --- src/Disks/FakeDiskTransaction.h | 2 +- src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp | 5 ++++- src/Disks/ObjectStorages/IMetadataStorage.h | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h index 65a42481e70..69f08de2517 100644 --- a/src/Disks/FakeDiskTransaction.h +++ b/src/Disks/FakeDiskTransaction.h @@ -142,7 +142,7 @@ public: void truncateFile(const std::string & /* src_path */, size_t /* target_size */) override { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Operation is not implemented"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Operation `truncateFile` is not implemented"); } private: diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 605487e8cba..e7c85bea1c6 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -588,7 +588,7 @@ struct TruncateFileObjectStorageOperation final : public IDiskObjectStorageOpera if (!metadata_storage.isFile(path)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not a file", path); - truncate_outcome = tx->truncateFile(path,size); + truncate_outcome = tx->truncateFile(path, size); } } @@ -599,6 +599,9 @@ struct TruncateFileObjectStorageOperation final : public IDiskObjectStorageOpera void finalize() override { + if (!truncate_outcome) + return; + if (!truncate_outcome->objects_to_remove.empty()) object_storage.removeObjectsIfExist(truncate_outcome->objects_to_remove); } diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 06ece53a32f..bed24849ed6 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -154,7 +154,6 @@ public: virtual TruncateFileOperationOutcomePtr truncateFile(const std::string & /* path */, size_t /* size */) { throwNotImplemented(); - return nullptr; } virtual ~IMetadataTransaction() = default; From ede7b1474900d0dc7e71fce6dd5e3e7ed1781a00 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 19:43:39 +0200 Subject: [PATCH 449/651] More of that --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index afea22a1b90..2d51c1b242f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,9 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # Sanitizers are too heavy. Some architectures too. if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE OR ARCH_RISCV64 OR ARCH_LOONGARCH64) - set (RLIMIT_DATA 10000000000) # 10G + # Twice as large + set (RLIMIT_DATA 10000000000) + set (RLIMIT_AS 20000000000) endif() # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file From a735ab7dd1f46e4e6fbcd529efafaada5b70afa5 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 20 May 2024 18:58:56 +0000 Subject: [PATCH 450/651] fix upstream commit status update in sync pr --- .github/workflows/pull_request.yml | 2 +- tests/ci/commit_status_helper.py | 36 +++++++++++++----------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 21c2e48677d..f20e987db97 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -178,7 +178,7 @@ jobs: # FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3, CheckReadyForMerge] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 34e2d9f8085..bbda97b9084 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -530,47 +530,41 @@ def update_upstream_sync_status( sync_pr = sync_repo.get_pull(sync_pr_number) # Find the commit that is in both repos, upstream and cloud sync_commits = sync_pr.get_commits().reversed - upstream_commits = upstream_pr.get_commits() + upstream_commits = upstream_pr.get_commits().reversed # Github objects are compared by _url attribute. We can't compare them directly and # should compare commits by SHA1 - upstream_shas = [uc.sha for uc in upstream_commits] + upstream_shas = [c.sha for c in upstream_commits] logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) - sync_shas = [uc.sha for uc in upstream_commits] + sync_shas = [c.sha for c in sync_commits] logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) - found = False - for commit in sync_commits: - try: - idx = upstream_shas.index(commit.sha) - found = True - upstream_commit = upstream_commits[idx] - break - except ValueError: - continue - if not found: - logging.info( - "There's no same commits in upstream and sync PRs, probably force-push" - ) - return + # find latest synced commit + last_synced_upstream_commit = None + for commit in upstream_commits: + if commit.sha in sync_shas: + last_synced_upstream_commit = commit + break + + assert last_synced_upstream_commit sync_status = get_status(state) logging.info( "Using commit %s to post the %s status `%s`: [%s]", - upstream_commit.sha, + last_synced_upstream_commit.sha, sync_status, StatusNames.SYNC, "", ) post_commit_status( - upstream_commit, + last_synced_upstream_commit, sync_status, "", # let's won't expose any urls from cloud "", StatusNames.SYNC, ) trigger_mergeable_check( - upstream_commit, - get_commit_filtered_statuses(upstream_commit), + last_synced_upstream_commit, + get_commit_filtered_statuses(last_synced_upstream_commit), True, set_if_green=can_set_green_mergeable_status, ) From 84459052b6cddd9a5e1ca4bcd00e5edfc6e49f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 20 May 2024 21:27:24 +0200 Subject: [PATCH 451/651] Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView --- src/Interpreters/InterpreterCreateQuery.cpp | 7 +++++++ .../0_stateless/03161_create_table_as_mv.reference | 0 .../0_stateless/03161_create_table_as_mv.sql | 14 ++++++++++++++ 3 files changed, 21 insertions(+) create mode 100644 tests/queries/0_stateless/03161_create_table_as_mv.reference create mode 100644 tests/queries/0_stateless/03161_create_table_as_mv.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 519cbde588f..711693f71b1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -977,6 +977,13 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); + if (as_create.is_materialized_view && as_create.to_table_id) + throw Exception( + ErrorCodes::INCORRECT_QUERY, + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + qualified_name, + as_create.to_table_id.getQualifiedName()); + if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.reference b/tests/queries/0_stateless/03161_create_table_as_mv.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.sql b/tests/queries/0_stateless/03161_create_table_as_mv.sql new file mode 100644 index 00000000000..e80659ac923 --- /dev/null +++ b/tests/queries/0_stateless/03161_create_table_as_mv.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS base_table; +DROP TABLE IF EXISTS target_table; +DROP TABLE IF EXISTS mv_from_base_to_target; +DROP TABLE IF EXISTS mv_with_storage; +DROP TABLE IF EXISTS other_table_1; +DROP TABLE IF EXISTS other_table_2; + +CREATE TABLE base_table (date DateTime, id String, cost Float64) ENGINE = MergeTree() ORDER BY date; +CREATE TABLE target_table (id String, total AggregateFunction(sum, Float64)) ENGINE = MergeTree() ORDER BY id; +CREATE MATERIALIZED VIEW mv_from_base_to_target TO target_table AS Select id, sumState(cost) FROM base_table GROUP BY id; +CREATE MATERIALIZED VIEW mv_with_storage ENGINE=MergeTree() ORDER BY id AS Select id, sumState(cost) FROM base_table GROUP BY id; + +CREATE TABLE other_table_1 AS mv_with_storage; +CREATE TABLE other_table_2 AS mv_from_base_to_target; -- { serverError INCORRECT_QUERY } From 6c6cdaccd13e32d51fcb41100ca82d6c7ce0c37d Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Sat, 11 May 2024 00:51:52 +0000 Subject: [PATCH 452/651] Fix filter pushdown for Parquet and maybe StorageMerge --- .../QueryPlan/ReadFromMergeTree.cpp | 20 +------------------ .../QueryPlan/ReadFromPreparedSource.cpp | 1 - .../QueryPlan/SourceStepWithFilter.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 3 ++- src/Storages/IStorageCluster.cpp | 3 ++- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 ++- src/Storages/S3Queue/StorageS3Queue.cpp | 3 ++- src/Storages/SelectQueryInfo.cpp | 20 +++++++++++++++++++ src/Storages/SelectQueryInfo.h | 6 ++++++ src/Storages/StorageAzureBlob.cpp | 3 ++- src/Storages/StorageFile.cpp | 3 ++- src/Storages/StorageMerge.cpp | 2 +- src/Storages/StorageMergeTreeIndex.cpp | 3 ++- src/Storages/StorageS3.cpp | 3 ++- src/Storages/StorageURL.cpp | 3 ++- .../System/IStorageSystemOneBlock.cpp | 3 ++- src/Storages/System/StorageSystemColumns.cpp | 3 ++- .../StorageSystemDataSkippingIndices.cpp | 3 ++- .../System/StorageSystemDetachedParts.cpp | 3 ++- .../System/StorageSystemPartsBase.cpp | 3 ++- src/Storages/System/StorageSystemReplicas.cpp | 3 ++- src/Storages/System/StorageSystemTables.cpp | 3 ++- .../System/StorageSystemZooKeeper.cpp | 3 ++- .../02841_parquet_filter_pushdown.sql | 4 ---- 24 files changed, 63 insertions(+), 43 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e523a2c243c..6f0fa55c349 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1534,25 +1534,7 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) { if (!indexes) { - /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, - /// while key analysis still requires unqualified column names. - std::unordered_map node_name_to_input_node_column; - if (query_info.planner_context) - { - const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); - for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) - { - /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, - /// so they should not be added to the input nodes. - if (alias_column_expressions.contains(column_name)) - continue; - const auto & column = table_expression_data.getColumnOrThrow(column_name); - node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); - } - } - - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, node_name_to_input_node_column); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, query_info.buildNodeNameToInputNodeColumn()); /// NOTE: Currently we store two DAGs for analysis: /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index b845101125b..92c936cdc20 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,6 +1,5 @@ #include #include -#include #include namespace DB diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index ce5a59a92f9..ad0940b90b9 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -80,7 +80,7 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo void SourceStepWithFilter::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes, query_info.buildNodeNameToInputNodeColumn()); } void SourceStepWithFilter::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 0f3b03f0955..33bde34b4f9 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -994,7 +994,8 @@ private: void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index ab45ce877c2..9c5b29ae265 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -86,7 +86,8 @@ private: void ReadFromCluster::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 01417b8977b..1a9aa6d0f41 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -578,7 +578,8 @@ void ReadFromEmbeddedRocksDB::initializePipeline(QueryPipelineBuilder & pipeline void ReadFromEmbeddedRocksDB::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const auto & sample_block = getOutputStream().header; auto primary_key_data_type = sample_block.getByName(storage.primary_key).type; std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_actions_dag, context); diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index c3a772e532c..16e42e32b8a 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -287,7 +287,8 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/SelectQueryInfo.cpp b/src/Storages/SelectQueryInfo.cpp index 665da7fee70..d59ccf0dfaf 100644 --- a/src/Storages/SelectQueryInfo.cpp +++ b/src/Storages/SelectQueryInfo.cpp @@ -13,4 +13,24 @@ bool SelectQueryInfo::isFinal() const return select.final(); } +std::unordered_map SelectQueryInfo::buildNodeNameToInputNodeColumn() const +{ + std::unordered_map node_name_to_input_node_column; + if (planner_context) + { + const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) + { + /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, + /// so they should not be added to the input nodes. + if (alias_column_expressions.contains(column_name)) + continue; + const auto & column = table_expression_data.getColumnOrThrow(column_name); + node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); + } + } + return node_name_to_input_node_column; +} + } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 655676812d9..11e2a2fc5e7 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -239,5 +239,11 @@ struct SelectQueryInfo bool merge_tree_enable_remove_parts_from_snapshot_optimization = true; bool isFinal() const; + + /// Analyzer generates unique ColumnIdentifiers like __table1.__partition_id in filter nodes, + /// while key analysis still requires unqualified column names. + /// This function generates a map that maps the unique names to table column names, + /// for the current table (`table_expression`). + std::unordered_map buildNodeNameToInputNodeColumn() const; }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index e1c6ec0097c..e98eaf1e8f2 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -799,7 +799,8 @@ private: void ReadFromAzureBlob::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7db8fc2500a..51bcc64bceb 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1534,7 +1534,8 @@ private: void ReadFromFile::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 7afa480149f..f244ca0be05 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1622,7 +1622,7 @@ void ReadFromMerge::applyFilters(const QueryPlan & plan, const ActionDAGNodes & void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); filterTablesAndCreateChildrenPlans(); diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index 4747232d7f7..0b1ad02f8c9 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -280,7 +280,8 @@ private: void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 9768653f3fe..2ce188c203c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1454,7 +1454,8 @@ void StorageS3::read( void ReadFromStorageS3Step::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8a71a771367..272f771194d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1038,7 +1038,8 @@ private: void ReadFromURL::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 53399654c8d..456b7c4f90b 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -91,7 +91,8 @@ void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 74b44cc0a2d..49da1eba9ec 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -342,7 +342,8 @@ private: void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2afc03d0e5e..093adc59cc6 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -219,7 +219,8 @@ private: void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 31d566ef8b6..f48a8c67971 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -313,7 +313,8 @@ protected: void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) { const auto * predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b1ea2dd3f2b..175c0834bcb 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -263,7 +263,8 @@ ReadFromSystemPartsBase::ReadFromSystemPartsBase( void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) { const auto * predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 10d5c353c43..3bd5fd290db 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -290,7 +290,8 @@ private: void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index d428d6bd6d0..1e6c7a0c9ca 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -750,7 +750,8 @@ void StorageSystemTables::read( void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 7afa1894a64..eccddbd5d88 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -474,7 +474,8 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont void ReadFromSystemZooKeeper::applyFilters(ActionDAGNodes added_filter_nodes) { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + paths = extractPath(added_filter_nodes.nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); } diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql index 8521ada04d5..950485d53f0 100644 --- a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql @@ -8,10 +8,6 @@ set optimize_or_like_chain = 0; set max_block_size = 100000; set max_insert_threads = 1; --- Analyzer breaks the queries with IN and some queries with BETWEEN. --- TODO: Figure out why. -set allow_experimental_analyzer=0; - -- Try all the types. insert into function file('02841.parquet') -- Use negative numbers to test sign extension for signed types and lack of sign extension for From 4ed944285de824d55458aec635bdd45671dbda39 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Sat, 11 May 2024 01:19:09 +0000 Subject: [PATCH 453/651] Enable ORC test too --- tests/queries/0_stateless/02892_orc_filter_pushdown.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql index d319252f592..f9aa7696ac6 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql @@ -13,9 +13,6 @@ set max_insert_threads = 1; SET session_timezone = 'UTC'; --- Analyzer breaks the queries with IN and some queries with BETWEEN. -set allow_experimental_analyzer=0; - -- Try all the types. insert into function file('02892.orc') From 2e83043d7b0220f08e8bd03d46710d22fe072d57 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 21 May 2024 00:34:54 +0000 Subject: [PATCH 454/651] fix --- src/Storages/StorageMerge.cpp | 2 +- src/Storages/System/StorageSystemZooKeeper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f244ca0be05..4c678a1228b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1622,7 +1622,7 @@ void ReadFromMerge::applyFilters(const QueryPlan & plan, const ActionDAGNodes & void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + SourceStepWithFilter::applyFilters(added_filter_nodes); filterTablesAndCreateChildrenPlans(); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index eccddbd5d88..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -474,7 +474,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont void ReadFromSystemZooKeeper::applyFilters(ActionDAGNodes added_filter_nodes) { - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + SourceStepWithFilter::applyFilters(added_filter_nodes); paths = extractPath(added_filter_nodes.nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); } From a66eab2da14bce638ce8ef6abb51ae5e0125c49d Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 21 May 2024 10:13:47 +0200 Subject: [PATCH 455/651] fix clang tidy --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index cc53054c775..c83b9247b99 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -306,7 +306,6 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - std::string endpoint_string = endpoint.getEndpoint(); return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, From b253ca36084ec50e8d06dfe50cb3561cd915a602 Mon Sep 17 00:00:00 2001 From: copperybean Date: Mon, 20 May 2024 23:12:07 +0800 Subject: [PATCH 456/651] fix clang-tidy warnings Change-Id: Iff9f5f894e815b184ac35f61b4cac87908c612b5 --- contrib/arrow | 2 +- src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/arrow b/contrib/arrow index 8f36d71d185..5cfccd8ea65 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb +Subproject commit 5cfccd8ea65f33d4517e7409815d761c7650b45d diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 69da40b47e6..a7e51f88b3c 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -314,10 +314,10 @@ ParquetRecordReader::ParquetRecordReader( log = &Poco::Logger::get("ParquetRecordReader"); std::unordered_map parquet_columns; - auto root = file_reader->metadata()->schema()->group_node(); + const auto * root = file_reader->metadata()->schema()->group_node(); for (int i = 0; i < root->field_count(); ++i) { - auto & node = root->field(i); + const auto & node = root->field(i); parquet_columns.emplace(node->name(), node); } @@ -329,7 +329,7 @@ ParquetRecordReader::ParquetRecordReader( if (it == parquet_columns.end()) throw Exception(ErrorCodes::PARQUET_EXCEPTION, "no column with '{}' in parquet file", col_with_name.name); - auto node = it->second; + const auto & node = it->second; if (!node->is_primitive()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "arrays and maps are not implemented in native parquet reader"); From 4b72748b2a1466a3c4947a321a0d5b536888ce63 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 17 May 2024 11:47:45 +0000 Subject: [PATCH 457/651] Better handling illegal user input not to throw unexpected of CORRUPTED_DATA errors --- .../AggregateFunctionsArgMinArgMax.cpp | 4 ++-- src/Dictionaries/SSDCacheDictionaryStorage.h | 7 +++---- src/Interpreters/executeQuery.cpp | 11 ++++++++++- src/Server/HTTPHandler.cpp | 8 ++++---- .../02477_single_value_data_string_regression.sql | 8 ++++---- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp b/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp index e8f40120152..9608ca26f37 100644 --- a/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp +++ b/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp @@ -14,7 +14,7 @@ struct Settings; namespace ErrorCodes { -extern const int CORRUPTED_DATA; +extern const int INCORRECT_DATA; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; } @@ -198,7 +198,7 @@ public: this->data(place).value().read(buf, *serialization_val, arena); if (unlikely(this->data(place).value().has() != this->data(place).result().has())) throw Exception( - ErrorCodes::CORRUPTED_DATA, + ErrorCodes::INCORRECT_DATA, "Invalid state of the aggregate function {}: has_value ({}) != has_result ({})", getName(), this->data(place).value().has(), diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index e3eea71cd9a..f0b56cbf529 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -721,11 +721,10 @@ public: if (!block.checkCheckSum()) { std::string calculated_check_sum = std::to_string(block.calculateCheckSum()); - std::string check_sum = std::to_string(block.getCheckSum()); + std::string expected_check_sum = std::to_string(block.getCheckSum()); throw Exception(ErrorCodes::CORRUPTED_DATA, - "Cache data corrupted. Checksum validation failed. Calculated {} in block {}", - calculated_check_sum, - check_sum); + "Cache data corrupted. Checksum validation failed. Calculated {} expected in block {}, in file {}", + calculated_check_sum, expected_check_sum, file_path); } func(blocks_to_fetch[block_to_fetch_index], block.getBlockData()); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f1f72a4ea4a..cf0167bff17 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1421,7 +1421,16 @@ void executeQuery( const char * begin; const char * end; - istr.nextIfAtEnd(); + try + { + istr.nextIfAtEnd(); + } + catch (...) + { + /// If buffer contains invalid data and we failed to decompress, we still want to have some information about the query in the log. + logQuery("", context, /* internal = */ false, QueryProcessingStage::Complete); + throw; + } size_t max_query_size = context->getSettingsRef().max_query_size; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a677c537622..d1db4cb3951 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -707,11 +707,11 @@ void HTTPHandler::processQuery( /// The data can also be compressed using incompatible internal algorithm. This is indicated by /// 'decompress' query parameter. std::unique_ptr in_post_maybe_compressed; - bool in_post_compressed = false; + bool is_in_post_compressed = false; if (params.getParsed("decompress", false)) { - in_post_maybe_compressed = std::make_unique(*in_post); - in_post_compressed = true; + in_post_maybe_compressed = std::make_unique(*in_post, /* allow_different_codecs_ = */ false, /* external_data_ = */ true); + is_in_post_compressed = true; } else in_post_maybe_compressed = std::move(in_post); @@ -845,7 +845,7 @@ void HTTPHandler::processQuery( /// If 'http_native_compression_disable_checksumming_on_decompress' setting is turned on, /// checksums of client data compressed with internal algorithm are not checked. - if (in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) + if (is_in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) static_cast(*in_post_maybe_compressed).disableChecksumming(); /// Add CORS header if 'add_http_cors_header' setting is turned on send * in Access-Control-Allow-Origin diff --git a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql index 0f11a06f3fc..8499786f47a 100644 --- a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql +++ b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql @@ -103,11 +103,11 @@ SELECT '2^30-1', maxMerge(x) from (select CAST(unhex('ffffff3f') || randomString SELECT '1M without 0', length(maxMerge(x)) from (select CAST(unhex('00001000') || randomString(0x00100000 - 1) || 'x', 'AggregateFunction(max, String)') as x); SELECT '1M with 0', length(maxMerge(x)) from (select CAST(unhex('00001000') || randomString(0x00100000 - 1) || '\0', 'AggregateFunction(max, String)') as x); -SELECT 'fuzz1', finalizeAggregation(CAST(unhex('3000000\0303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353600010000000000000000'), 'AggregateFunction(argMax, String, UInt64)')); -- { serverError CORRUPTED_DATA } +SELECT 'fuzz1', finalizeAggregation(CAST(unhex('3000000\0303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353600010000000000000000'), 'AggregateFunction(argMax, String, UInt64)')); -- { serverError INCORRECT_DATA } SELECT 'fuzz2', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '01' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA } -SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA } -SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError CORRUPTED_DATA } +SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError INCORRECT_DATA } +SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError INCORRECT_DATA } +SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError INCORRECT_DATA } drop table if exists aggr; From d1127bf119c9843050cc06093ea130feda6f6c5c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 17 May 2024 12:09:46 +0000 Subject: [PATCH 458/651] Fix final=1 for distributed over non-mt table. --- src/Analyzer/Passes/AutoFinalOnQueryPass.cpp | 2 +- src/Analyzer/QueryTreePassManager.cpp | 4 ++-- .../0_stateless/02420_final_setting_analyzer.reference | 4 ++++ tests/queries/0_stateless/02420_final_setting_analyzer.sql | 3 +++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 9bd044dd89c..70aa1a41548 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -42,7 +42,7 @@ private: return; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - bool is_final_supported = storage && storage->supportsFinal(); + bool is_final_supported = storage && !storage->isRemote() && storage->supportsFinal(); if (!is_final_supported) return; diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 51f1fb6cc2f..f7919b6422c 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -192,7 +192,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) void QueryTreePassManager::runOnlyResolve(QueryTreeNodePtr query_tree_node) { // Run only QueryAnalysisPass and GroupingFunctionsResolvePass passes. - run(query_tree_node, 2); + run(query_tree_node, 3); } void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index) @@ -249,6 +249,7 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) { manager.addPass(std::make_unique(only_analyze)); manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); @@ -294,7 +295,6 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) manager.addPass(std::make_unique()); - manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.reference b/tests/queries/0_stateless/02420_final_setting_analyzer.reference index dd9fed65f13..780a6e5de68 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.reference +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.reference @@ -132,3 +132,7 @@ SELECT * FROM merge_table ORDER BY id, val; 2 a 2 b 3 c +select sum(number) from numbers(10) settings final=1; +45 +select sum(number) from remote('127.0.0.{1,2}', numbers(10)) settings final=1; +90 diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.sql b/tests/queries/0_stateless/02420_final_setting_analyzer.sql index 14c832cfaf5..cbdec017602 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.sql +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.sql @@ -102,3 +102,6 @@ insert into table_to_merge_c values (3,'c'); -- expected output: -- 1 c, 2 a, 2 b, 3 c SELECT * FROM merge_table ORDER BY id, val; + +select sum(number) from numbers(10) settings final=1; +select sum(number) from remote('127.0.0.{1,2}', numbers(10)) settings final=1; From c1fde88e6aeff4833a36bd508108a0307b07aa4e Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 11:06:24 +0200 Subject: [PATCH 459/651] Better error case handling --- .../copyAzureBlobStorageFile.cpp | 24 +++++++++---------- src/IO/S3/copyS3File.cpp | 24 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 7e3fb9e79bc..18fd3edffd8 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -124,23 +124,23 @@ namespace num_parts = (total_size + part_size - 1) / part_size; } - if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size) - { - String msg; - if (num_parts < 1) - msg = "Number of parts is zero"; - else if (num_parts > max_part_number) - msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number); - else if (part_size < min_upload_part_size) - msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size); - else - msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size); + String error; + if (num_parts < 1) + error = "Number of parts is zero"; + else if (num_parts > max_part_number) + error = fmt::format("Number of parts exceeds {}/{}", num_parts, max_part_number); + else if (part_size < min_upload_part_size) + error = fmt::format("Size of a part is less than {}/{}", part_size, min_upload_part_size); + else if (part_size > max_upload_part_size) + error = fmt::format("Size of a part exceeds {}/{}", part_size, max_upload_part_size); + if (!error.empty()) + { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "{} while writing {} bytes to Azure. Check max_part_number = {}, " "min_upload_part_size = {}, max_upload_part_size = {}", - msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size); + error, total_size, max_part_number, min_upload_part_size, max_upload_part_size); } /// We've calculated the size of a normal part (the final part can be smaller). diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 549d0a569c6..cff6fa5ad21 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -316,23 +316,23 @@ namespace num_parts = (total_size + part_size - 1) / part_size; } - if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size) - { - String msg; - if (num_parts < 1) - msg = "Number of parts is zero"; - else if (num_parts > max_part_number) - msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number); - else if (part_size < min_upload_part_size) - msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size); - else - msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size); + String error; + if (num_parts < 1) + error = "Number of parts is zero"; + else if (num_parts > max_part_number) + error = fmt::format("Number of parts exceeds {}/{}", num_parts, max_part_number); + else if (part_size < min_upload_part_size) + error = fmt::format("Size of a part is less than {}/{}", part_size, min_upload_part_size); + else if (part_size > max_upload_part_size) + error = fmt::format("Size of a part exceeds {}/{}", part_size, max_upload_part_size); + if (!error.empty()) + { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "{} while writing {} bytes to S3. Check max_part_number = {}, " "min_upload_part_size = {}, max_upload_part_size = {}", - msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size); + error, total_size, max_part_number, min_upload_part_size, max_upload_part_size); } /// We've calculated the size of a normal part (the final part can be smaller). From e1caea6ab51d032fcba5e4356d7a4b5869e2eb9c Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 11:29:07 +0200 Subject: [PATCH 460/651] Split attached table count into attached tables, views and dictionaries --- programs/server/Server.cpp | 2 ++ src/Common/CurrentMetrics.cpp | 2 ++ src/Core/ServerSettings.h | 2 ++ src/Databases/DatabaseLazy.cpp | 24 +++++++++++++++++++++-- src/Databases/DatabasesCommon.cpp | 22 +++++++++++++++++++-- src/Interpreters/Context.cpp | 20 +++++++++++++++++++ src/Interpreters/Context.h | 2 ++ tests/config/config.d/max_num_to_warn.xml | 2 ++ 8 files changed, 72 insertions(+), 4 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9c9476d1aa7..223bc1f77e7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1476,6 +1476,8 @@ try global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop); global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop); global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn); + global_context->setMaxViewNumToWarn(new_server_settings.max_view_num_to_warn); + global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn); global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 21b4d114d79..b557edc3e12 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -224,6 +224,8 @@ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ + M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ + M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 524d6ec07c2..af96ca3a557 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -97,6 +97,8 @@ namespace DB M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_view_num_to_warn, 5000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_dictionary_num_to_warn, 5000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index fb1b3ee626b..ca985b5a7c8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "Common/CurrentMetrics.h" #include #include @@ -24,6 +25,8 @@ namespace fs = std::filesystem; namespace CurrentMetrics { extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; } @@ -184,7 +187,16 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::Metric metric; + if (table->isView()) { + metric = CurrentMetrics::AttachedView; + } else if (table->isDictionary()) { + metric = CurrentMetrics::AttachedDictionary; + } else { + metric = CurrentMetrics::AttachedTable; + } + CurrentMetrics::add(metric, 1); + } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -200,7 +212,15 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::Metric metric; + if (res->isView()) { + metric = CurrentMetrics::AttachedView; + } else if (res->isDictionary()) { + metric = CurrentMetrics::AttachedDictionary; + } else { + metric = CurrentMetrics::AttachedTable; + } + CurrentMetrics::sub(metric, 1); } return res; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fc75f8e44b9..ab7f2fff5aa 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -18,6 +18,8 @@ namespace CurrentMetrics { extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; } @@ -263,7 +265,15 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::Metric metric; + if (res->isView()) { + metric = CurrentMetrics::AttachedView; + } else if (res->isDictionary()) { + metric = CurrentMetrics::AttachedDictionary; + } else { + metric = CurrentMetrics::AttachedTable; + } + CurrentMetrics::sub(metric, 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -304,7 +314,15 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::Metric metric; + if (table->isView()) { + metric = CurrentMetrics::AttachedView; + } else if (table->isDictionary()) { + metric = CurrentMetrics::AttachedDictionary; + } else { + metric = CurrentMetrics::AttachedTable; + } + CurrentMetrics::add(metric, 1); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1bd9601dd7e..4c5df8ef4ea 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -160,6 +160,8 @@ namespace CurrentMetrics extern const Metric TablesLoaderForegroundThreadsScheduled; extern const Metric IOWriterThreadsScheduled; extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; extern const Metric AttachedDatabase; extern const Metric PartsActive; } @@ -359,6 +361,8 @@ struct ContextSharedPart : boost::noncopyable /// No lock required for format_schema_path modified only during initialization std::atomic_size_t max_database_num_to_warn = 1000lu; std::atomic_size_t max_table_num_to_warn = 5000lu; + std::atomic_size_t max_view_num_to_warn = 5000lu; + std::atomic_size_t max_dictionary_num_to_warn = 5000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. @@ -935,6 +939,10 @@ Strings Context::getWarnings() const common_warnings = shared->warnings; if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast(shared->max_table_num_to_warn)) common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn)); + if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast(shared->max_view_num_to_warn)) + common_warnings.emplace_back(fmt::format("The number of attached views is more than {}", shared->max_view_num_to_warn)); + if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast(shared->max_dictionary_num_to_warn)) + common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}", shared->max_dictionary_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast(shared->max_database_num_to_warn)) common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast(shared->max_part_num_to_warn)) @@ -3711,6 +3719,18 @@ void Context::setMaxTableNumToWarn(size_t max_table_to_warn) shared->max_table_num_to_warn= max_table_to_warn; } +void Context::setMaxViewNumToWarn(size_t max_view_to_warn) +{ + SharedLockGuard lock(shared->mutex); + shared->max_view_num_to_warn= max_view_to_warn; +} + +void Context::setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn) +{ + SharedLockGuard lock(shared->mutex); + shared->max_dictionary_num_to_warn= max_dictionary_to_warn; +} + void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn) { SharedLockGuard lock(shared->mutex); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 7f663773e52..814534f7035 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -861,6 +861,8 @@ public: const HTTPHeaderFilter & getHTTPHeaderFilter() const; void setMaxTableNumToWarn(size_t max_table_to_warn); + void setMaxViewNumToWarn(size_t max_view_to_warn); + void setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn); void setMaxDatabaseNumToWarn(size_t max_database_to_warn); void setMaxPartNumToWarn(size_t max_part_to_warn); /// The port that the server listens for executing SQL queries. diff --git a/tests/config/config.d/max_num_to_warn.xml b/tests/config/config.d/max_num_to_warn.xml index 776c270823d..1f55e6fd674 100644 --- a/tests/config/config.d/max_num_to_warn.xml +++ b/tests/config/config.d/max_num_to_warn.xml @@ -1,5 +1,7 @@ 5 + 5 + 5 2 10 From 311d6d6baa32ad0bdee1c58813c6d551aaeb53e0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 21 May 2024 09:38:36 +0000 Subject: [PATCH 461/651] Fix: 02124_insert_deduplication_token_multiple_blocks_replica --- .../02124_insert_deduplication_token_multiple_blocks_replica.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh index 1c776263f78..0c95abb9867 100755 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh @@ -9,6 +9,8 @@ INSERT_BLOCK_SETTINGS="max_insert_block_size=1&min_insert_block_size_rows=0&min_ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS block_dedup_token_replica SYNC" $CLICKHOUSE_CLIENT --query="CREATE TABLE block_dedup_token_replica (id Int32) ENGINE=ReplicatedMergeTree('/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{table}', '{replica}') ORDER BY id" +# Need to stop merges due to randomization of old_parts_lifetime setting, so all initial parts are guaranteed to exist when we check them +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES block_dedup_token_replica" $CLICKHOUSE_CLIENT --query="SELECT 'insert 2 blocks with dedup token, 1 row per block'" DEDUP_TOKEN='dedup1' From 0b3102576e6137d2783068fa7f325c96f57a37c2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 21 May 2024 11:43:16 +0200 Subject: [PATCH 462/651] One attempt to fix native copy for azure --- .../copyAzureBlobStorageFile.cpp | 21 +++++++--- .../test.py | 39 ++++++++++++++++++- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 667e63729ca..38eb97e17f4 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -300,21 +300,32 @@ void copyAzureBlobStorageFile( if (size < settings->max_single_part_copy_size) { + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy blob sync {} -> {}", src_blob, dest_blob); block_blob_client_dest.CopyFromUri(source_uri); } else { Azure::Storage::Blobs::StartBlobCopyOperation operation = block_blob_client_dest.StartCopyFromUri(source_uri); - // Wait for the operation to finish, checking for status every 100 second. auto copy_response = operation.PollUntilDone(std::chrono::milliseconds(100)); auto properties_model = copy_response.Value; - if (properties_model.CopySource.HasValue()) - { - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy failed"); - } + auto copy_status = properties_model.CopyStatus; + auto copy_status_description = properties_model.CopyStatusDescription; + + if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success) + { + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); + } + else + { + if (copy_status.HasValue()) + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} failed with status {} description {} (operation is done {})", + src_blob, dest_blob, copy_status.Value().ToString(), copy_status_description.Value(), operation.IsDone()); + else + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} didn't complete with sucess status (operation is done {})", src_blob, dest_blob, operation.IsDone()); + } } } else diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py index a47688d0713..3169696a4d7 100644 --- a/tests/integration/test_azure_blob_storage_native_copy/test.py +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -110,6 +110,11 @@ def cluster(): main_configs=[path], with_azurite=True, ) + cluster.add_instance( + "node3", + main_configs=[path], + with_azurite=True, + ) cluster.start() yield cluster @@ -182,7 +187,6 @@ def test_backup_restore_on_merge_tree_same_container(cluster): azure_query(node1, f"DROP TABLE test_simple_merge_tree_restored") - def test_backup_restore_on_merge_tree_different_container(cluster): node2 = cluster.instances["node2"] azure_query( @@ -217,3 +221,36 @@ def test_backup_restore_on_merge_tree_different_container(cluster): azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket") azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket_restored") + + +def test_backup_restore_on_merge_tree_native_copy_async(cluster): + node3 = cluster.instances["node3"] + azure_query( + node3, + f"CREATE TABLE test_simple_merge_tree_async(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_cache'", + ) + azure_query(node3, f"INSERT INTO test_simple_merge_tree_async VALUES (1, 'a')") + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_async_backup')" + print("BACKUP DEST", backup_destination) + azure_query( + node3, + f"BACKUP TABLE test_simple_merge_tree_async TO {backup_destination}", + settings={"azure_max_single_part_copy_size": 0} + ) + + assert node3.contains_in_log("using native copy") + + azure_query( + node3, + f"RESTORE TABLE test_simple_merge_tree_async AS test_simple_merge_tree_async_restored FROM {backup_destination};", + settings={"azure_max_single_part_copy_size": 0} + ) + assert ( + azure_query(node3, f"SELECT * from test_simple_merge_tree_async_restored") == "1\ta\n" + ) + + assert node3.contains_in_log("using native copy") + + azure_query(node3, f"DROP TABLE test_simple_merge_tree_async") + azure_query(node3, f"DROP TABLE test_simple_merge_tree_async_restored") From a6dd13814cfcc14d8f56d1c7ec142f109d01c369 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 21 May 2024 09:53:22 +0000 Subject: [PATCH 463/651] Automatic style fix --- .../test_azure_blob_storage_native_copy/test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py index 3169696a4d7..77d400240b1 100644 --- a/tests/integration/test_azure_blob_storage_native_copy/test.py +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -236,7 +236,7 @@ def test_backup_restore_on_merge_tree_native_copy_async(cluster): azure_query( node3, f"BACKUP TABLE test_simple_merge_tree_async TO {backup_destination}", - settings={"azure_max_single_part_copy_size": 0} + settings={"azure_max_single_part_copy_size": 0}, ) assert node3.contains_in_log("using native copy") @@ -244,10 +244,11 @@ def test_backup_restore_on_merge_tree_native_copy_async(cluster): azure_query( node3, f"RESTORE TABLE test_simple_merge_tree_async AS test_simple_merge_tree_async_restored FROM {backup_destination};", - settings={"azure_max_single_part_copy_size": 0} + settings={"azure_max_single_part_copy_size": 0}, ) assert ( - azure_query(node3, f"SELECT * from test_simple_merge_tree_async_restored") == "1\ta\n" + azure_query(node3, f"SELECT * from test_simple_merge_tree_async_restored") + == "1\ta\n" ) assert node3.contains_in_log("using native copy") From 86c77b998ddc332a476d348639f05f7b63ee51e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 May 2024 12:49:57 +0200 Subject: [PATCH 464/651] Fix global query profiler Signed-off-by: Azat Khuzhin --- src/Interpreters/ThreadStatusExt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 6607df8d9af..9ca521a4ab3 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -462,8 +462,8 @@ void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_re { #if !defined(SANITIZER) && !defined(__APPLE__) /// profilers are useless without trace collector - auto global_context_ptr = global_context.lock(); - if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) + auto context = Context::getGlobalContextInstance(); + if (!context->hasTraceCollector()) return; try From ef182b035679a4f88968441b9c9ebeb6b6195b06 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 21 May 2024 12:54:46 +0200 Subject: [PATCH 465/651] Fix --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 38eb97e17f4..207b3cde868 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -324,7 +324,7 @@ void copyAzureBlobStorageFile( throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} failed with status {} description {} (operation is done {})", src_blob, dest_blob, copy_status.Value().ToString(), copy_status_description.Value(), operation.IsDone()); else - throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} didn't complete with sucess status (operation is done {})", src_blob, dest_blob, operation.IsDone()); + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Copy from {} to {} didn't complete with success status (operation is done {})", src_blob, dest_blob, operation.IsDone()); } } } From e1fef7ecd77da0b1eaed4b0dbc7a73b36cd228ac Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 21 May 2024 12:54:46 +0200 Subject: [PATCH 466/651] Group const fields --- src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 4 ++-- src/Storages/MergeTree/IMergeTreeDataPartWriter.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index e01572715d6..b3e33e94073 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -56,14 +56,14 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeIndexGranularity & index_granularity_) : data_part_name(data_part_name_) , serializations(serializations_) - , data_part_storage(data_part_storage_) , index_granularity_info(index_granularity_info_) , storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) , columns_list(columns_list_) , settings(settings_) - , index_granularity(index_granularity_) , with_final_mark(settings.can_use_adaptive_granularity) + , data_part_storage(data_part_storage_) + , index_granularity(index_granularity_) { } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 3245a23339b..d2bf03483c9 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -50,19 +50,19 @@ protected: IDataPartStorage & getDataPartStorage() { return *data_part_storage; } - /// Serializations for every columns and subcolumns by their names. const String data_part_name; + /// Serializations for every columns and subcolumns by their names. const SerializationByName serializations; - MutableDataPartStoragePtr data_part_storage; const MergeTreeIndexGranularityInfo index_granularity_info; const MergeTreeSettingsPtr storage_settings; const StorageMetadataPtr metadata_snapshot; const NamesAndTypesList columns_list; const MergeTreeWriterSettings settings; - MergeTreeIndexGranularity index_granularity; const bool with_final_mark; + MutableDataPartStoragePtr data_part_storage; MutableColumns index_columns; + MergeTreeIndexGranularity index_granularity; }; using MergeTreeDataPartWriterPtr = std::unique_ptr; From ffa38ecd8bbabff099e1bfb916b4699c9fde1054 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 21 May 2024 13:28:20 +0200 Subject: [PATCH 467/651] Cleanups --- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 - src/Storages/MergeTree/MergeTreeDataPartCompact.cpp | 7 ++++--- src/Storages/MergeTree/MergeTreeDataPartCompact.h | 1 - src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 9 ++++++--- src/Storages/MergeTree/MergeTreeDataPartWide.h | 1 - src/Storages/MergeTree/MergedBlockOutputStream.cpp | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 091a7ceb5bd..f4889d64179 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -104,7 +104,6 @@ public: const ValueSizeMap & avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0; -// TODO: remove? virtual bool isStoredOnDisk() const = 0; virtual bool isStoredOnRemoteDisk() const = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 373ad6c23ea..fb1c2fe35ed 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -74,9 +74,10 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( //// { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); //// return std::make_unique( - data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, - marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, + indices_to_recalc, stats_to_recalc_, marks_file_extension_, + default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index ca88edba7b3..1fb84424774 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -40,7 +40,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; -// TODO: remove? bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 34a3f30c4ba..74cab30064a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -69,9 +69,12 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { - return std::make_unique(data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, - marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); + return std::make_unique( + data_part_name_, logger_name_, serializations_, data_part_storage_, + index_granularity_info_, storage_settings_, columns_list, + metadata_snapshot, indices_to_recalc, stats_to_recalc_, + marks_file_extension_, + default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index e3cb3f04335..7465e08b7c4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -35,7 +35,6 @@ public: const ValueSizeMap & avg_value_size_hints, const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; -// TODO: remove? bool isStoredOnDisk() const override { return true; } bool isStoredOnRemoteDisk() const override; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index e0fb4f703a0..0fe3ee30a0d 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -40,7 +40,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( /* rewrite_primary_key = */ true, blocks_are_granules_size); -// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart + /// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart if (data_part->isStoredOnDisk()) data_part_storage->createDirectories(); From 8fc1abf2ab06485d0c4c63d6a0a2484189f71f84 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 15:51:24 +0200 Subject: [PATCH 468/651] Add documentation of new settings --- .../settings.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 28831404a1f..4d239309886 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -561,6 +561,25 @@ Default value: 5000 400 ``` +## max\_view\_num\_to\_warn {#max-view-num-to-warn} +If the number of attached views exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 5000 + +**Example** + +``` xml +400 +``` + +## max\_dictionary\_num\_to\_warn {#max-dictionary-num-to-warn} +If the number of attached dictionaries exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. +Default value: 5000 + +**Example** + +``` xml +400 +``` ## max\_part\_num\_to\_warn {#max-part-num-to-warn} If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. From 681de0145888b4dd30d75fd9b1fabe5e2e084b10 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 16:00:51 +0200 Subject: [PATCH 469/651] Extract common counter logic to method --- src/Databases/DatabaseLazy.cpp | 31 ++++++++++++------------------- src/Databases/DatabasesCommon.cpp | 31 +++++++++++++------------------ 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index ca985b5a7c8..a27e69c7e63 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -174,6 +174,16 @@ bool DatabaseLazy::empty() const return tables_cache.empty(); } +static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) { + if (storage->isView()) { + return CurrentMetrics::AttachedView; + } else if (storage->isDictionary()) { + return CurrentMetrics::AttachedDictionary; + } else { + return CurrentMetrics::AttachedTable; + } +} + void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &) { LOG_DEBUG(log, "Attach table {}.", backQuote(table_name)); @@ -187,16 +197,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - CurrentMetrics::Metric metric; - if (table->isView()) { - metric = CurrentMetrics::AttachedView; - } else if (table->isDictionary()) { - metric = CurrentMetrics::AttachedDictionary; - } else { - metric = CurrentMetrics::AttachedTable; - } - CurrentMetrics::add(metric, 1); - + CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -212,15 +213,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - CurrentMetrics::Metric metric; - if (res->isView()) { - metric = CurrentMetrics::AttachedView; - } else if (res->isDictionary()) { - metric = CurrentMetrics::AttachedDictionary; - } else { - metric = CurrentMetrics::AttachedTable; - } - CurrentMetrics::sub(metric, 1); + CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1); } return res; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index ab7f2fff5aa..03a8feb845f 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -254,6 +254,17 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con return detachTableUnlocked(table_name); } + +static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) { + if (storage->isView()) { + return CurrentMetrics::AttachedView; + } else if (storage->isDictionary()) { + return CurrentMetrics::AttachedDictionary; + } else { + return CurrentMetrics::AttachedTable; + } +} + StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name) { StoragePtr res; @@ -265,15 +276,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::Metric metric; - if (res->isView()) { - metric = CurrentMetrics::AttachedView; - } else if (res->isDictionary()) { - metric = CurrentMetrics::AttachedDictionary; - } else { - metric = CurrentMetrics::AttachedTable; - } - CurrentMetrics::sub(metric, 1); + CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -314,15 +317,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::Metric metric; - if (table->isView()) { - metric = CurrentMetrics::AttachedView; - } else if (table->isDictionary()) { - metric = CurrentMetrics::AttachedDictionary; - } else { - metric = CurrentMetrics::AttachedTable; - } - CurrentMetrics::add(metric, 1); + CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1); } void DatabaseWithOwnTablesBase::shutdown() From 98b89323c8239ce71153f88f6232806993b1a411 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 21 May 2024 16:14:48 +0200 Subject: [PATCH 470/651] Pass virtual columns descriptions to writer --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 16 ++++++++++------ .../MergeTree/IMergeTreeDataPartWriter.h | 4 ++++ .../MergeTree/MergeTreeDataPartCompact.cpp | 3 ++- src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 3 ++- .../MergeTree/MergeTreeDataPartWriterCompact.cpp | 3 ++- .../MergeTree/MergeTreeDataPartWriterCompact.h | 1 + .../MergeTree/MergeTreeDataPartWriterOnDisk.cpp | 3 ++- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 1 + .../MergeTree/MergeTreeDataPartWriterWide.cpp | 3 ++- .../MergeTree/MergeTreeDataPartWriterWide.h | 1 + .../MergeTree/MergedBlockOutputStream.cpp | 3 ++- .../MergeTree/MergedColumnOnlyOutputStream.cpp | 1 + 12 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index b3e33e94073..27da53de9b0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -52,6 +52,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) : data_part_name(data_part_name_) @@ -59,6 +60,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( , index_granularity_info(index_granularity_info_) , storage_settings(storage_settings_) , metadata_snapshot(metadata_snapshot_) + , virtual_columns(virtual_columns_) , columns_list(columns_list_) , settings(settings_) , with_final_mark(settings.can_use_adaptive_granularity) @@ -95,10 +97,9 @@ ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_nam if (const auto * column_desc = columns.tryGet(column_name)) return get_codec_or_default(*column_desc); -///// TODO: is this needed? -// if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) -// return get_codec_or_default(*virtual_desc); -// + if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name)) + return get_codec_or_default(*virtual_desc); + return default_codec->getFullCodecDesc(); } @@ -115,6 +116,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -131,6 +133,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -149,6 +152,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -158,11 +162,11 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( { if (part_type == MergeTreeDataPartType::Compact) return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); else if (part_type == MergeTreeDataPartType::Wide) return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString()); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index d2bf03483c9..5dcc7ddc599 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -29,6 +30,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_ = {}); @@ -56,6 +58,7 @@ protected: const MergeTreeIndexGranularityInfo index_granularity_info; const MergeTreeSettingsPtr storage_settings; const StorageMetadataPtr metadata_snapshot; + const VirtualsDescriptionPtr virtual_columns; const NamesAndTypesList columns_list; const MergeTreeWriterSettings settings; const bool with_final_mark; @@ -77,6 +80,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index fb1c2fe35ed..332b7d04f7f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -56,6 +56,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -75,7 +76,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( //// return std::make_unique( data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, + index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 74cab30064a..d4630d3dd3f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -62,6 +62,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -72,7 +73,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( return std::make_unique( data_part_name_, logger_name_, serializations_, data_part_storage_, index_granularity_info_, storage_settings_, columns_list, - metadata_snapshot, indices_to_recalc, stats_to_recalc_, + metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 3f08d8eea21..328e3118ba9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -18,6 +18,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc, const String & marks_file_extension_, @@ -27,7 +28,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( : MergeTreeDataPartWriterOnDisk( data_part_name_, logger_name_, serializations_, data_part_storage_, index_granularity_info_, storage_settings_, - columns_list_, metadata_snapshot_, + columns_list_, metadata_snapshot_, virtual_columns_, indices_to_recalc_, stats_to_recalc, marks_file_extension_, default_codec_, settings_, index_granularity_) , plain_file(getDataPartStorage().writeFile( diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 03804ff4966..f62f060fde2 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -19,6 +19,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 25eb83a82c0..30f01c1acd6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -148,6 +148,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const MergeTreeIndices & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -156,7 +157,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( const MergeTreeIndexGranularity & index_granularity_) : IMergeTreeDataPartWriter( data_part_name_, serializations_, data_part_storage_, index_granularity_info_, - storage_settings_, columns_list_, metadata_snapshot_, settings_, index_granularity_) + storage_settings_, columns_list_, metadata_snapshot_, virtual_columns_, settings_, index_granularity_) , skip_indices(indices_to_recalc_) , stats(stats_to_recalc_) , marks_file_extension(marks_file_extension_) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index e17724fa1d0..a60fcd43a58 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -109,6 +109,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a57bf7d2037..001f09b81b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -84,6 +84,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, + const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -93,7 +94,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( : MergeTreeDataPartWriterOnDisk( data_part_name_, logger_name_, serializations_, data_part_storage_, index_granularity_info_, storage_settings_, - columns_list_, metadata_snapshot_, + columns_list_, metadata_snapshot_, virtual_columns_, indices_to_recalc_, stats_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 5789213c910..8dc488788c6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -29,6 +29,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, + const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 0fe3ee30a0d..5ef967d930a 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -55,7 +55,8 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), data_part_storage, data_part->index_granularity_info, storage_settings, - columns_list, metadata_snapshot, skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity); + columns_list, metadata_snapshot, data_part->storage.getVirtualsPtr(), + skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity); } /// If data is pre-sorted. diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 1c75d81eca5..1d1783b1b43 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -39,6 +39,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( storage_settings, header.getNamesAndTypesList(), metadata_snapshot_, + data_part->storage.getVirtualsPtr(), indices_to_recalc, stats_to_recalc_, data_part->getMarksFileExtension(), From 372acbd3fcbb06d9cd650b785b99da346d6ce5c9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 21 May 2024 14:15:14 +0000 Subject: [PATCH 471/651] Refactor aliases a bit. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 314 +++++++++++------- .../02341_analyzer_aliases_basics.reference | 1 + .../02341_analyzer_aliases_basics.sql | 2 + .../0_stateless/02343_analyzer_lambdas.sql | 8 + 4 files changed, 204 insertions(+), 121 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 10f2290b34f..e50ad7911a0 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -471,7 +471,6 @@ struct TableExpressionData return buffer.str(); } }; - class ExpressionsStack { public: @@ -586,6 +585,82 @@ private: std::unordered_map alias_name_to_expressions; }; +struct ScopeAliases +{ + /// Alias name to query expression node + std::unordered_map alias_name_to_expression_node_before_group_by; + std::unordered_map alias_name_to_expression_node_after_group_by; + + std::unordered_map * alias_name_to_expression_node = nullptr; + + /// Alias name to lambda node + std::unordered_map alias_name_to_lambda_node; + + /// Alias name to table expression node + std::unordered_map alias_name_to_table_expression_node; + + /// Expressions like `x as y` where we can't say whether it's a function, expression or table. + std::unordered_map transitive_aliases; + + /// Nodes with duplicated aliases + std::unordered_set nodes_with_duplicated_aliases; + std::vector cloned_nodes_with_duplicated_aliases; + + std::unordered_map & getAliasMap(IdentifierLookupContext lookup_context) + { + switch (lookup_context) + { + case IdentifierLookupContext::EXPRESSION: return *alias_name_to_expression_node; + case IdentifierLookupContext::FUNCTION: return alias_name_to_lambda_node; + case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node; + } + + __builtin_unreachable(); + } + + enum class FindOption + { + FIRST_NAME, + FULL_NAME, + }; + + const std::string & getKey(const Identifier & identifier, FindOption find_option) + { + switch (find_option) + { + case FindOption::FIRST_NAME: return identifier.front(); + case FindOption::FULL_NAME: return identifier.getFullName(); + } + + __builtin_unreachable(); + } + + QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) + { + auto & alias_map = getAliasMap(lookup.lookup_context); + const std::string * key = &getKey(lookup.identifier, find_option); + + auto it = alias_map.find(*key); + while (it == alias_map.end()) + { + auto jt = transitive_aliases.find(*key); + if (jt == transitive_aliases.end()) + return {}; + + key = &(getKey(jt->second, find_option)); + it = alias_map.find(*key); + } + + return &it->second; + } + + const QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) const + { + return const_cast(this)->find(lookup, find_option); + } +}; + + /** Projection names is name of query tree node that is used in projection part of query node. * Example: SELECT id FROM test_table; * `id` is projection name of column node @@ -731,7 +806,7 @@ struct IdentifierResolveScope else if (parent_scope) join_use_nulls = parent_scope->join_use_nulls; - alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by; + aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by; } QueryTreeNodePtr scope_node; @@ -746,17 +821,7 @@ struct IdentifierResolveScope /// Argument can be expression like constant, column, function or table expression std::unordered_map expression_argument_name_to_node; - /// Alias name to query expression node - std::unordered_map alias_name_to_expression_node_before_group_by; - std::unordered_map alias_name_to_expression_node_after_group_by; - - std::unordered_map * alias_name_to_expression_node = nullptr; - - /// Alias name to lambda node - std::unordered_map alias_name_to_lambda_node; - - /// Alias name to table expression node - std::unordered_map alias_name_to_table_expression_node; + ScopeAliases aliases; /// Table column name to column node. Valid only during table ALIAS columns resolve. ColumnNameToColumnNodeMap column_name_to_column_node; @@ -767,10 +832,6 @@ struct IdentifierResolveScope /// Window name to window node std::unordered_map window_name_to_window_node; - /// Nodes with duplicated aliases - std::unordered_set nodes_with_duplicated_aliases; - std::vector cloned_nodes_with_duplicated_aliases; - /// Current scope expression in resolve process stack ExpressionsStack expressions_in_resolve_process_stack; @@ -889,7 +950,7 @@ struct IdentifierResolveScope bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction(); expressions_in_resolve_process_stack.push(node); if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction()) - alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by; + aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by; } void popExpressionNode() @@ -897,7 +958,7 @@ struct IdentifierResolveScope bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction(); expressions_in_resolve_process_stack.pop(); if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction()) - alias_name_to_expression_node = &alias_name_to_expression_node_after_group_by; + aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_after_group_by; } /// Dump identifier resolve scope @@ -916,16 +977,16 @@ struct IdentifierResolveScope for (const auto & [alias_name, node] : expression_argument_name_to_node) buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n'; - buffer << "Alias name to expression node table size " << alias_name_to_expression_node->size() << '\n'; - for (const auto & [alias_name, node] : *alias_name_to_expression_node) + buffer << "Alias name to expression node table size " << aliases.alias_name_to_expression_node->size() << '\n'; + for (const auto & [alias_name, node] : *aliases.alias_name_to_expression_node) buffer << "Alias name " << alias_name << " expression node " << node->dumpTree() << '\n'; - buffer << "Alias name to function node table size " << alias_name_to_lambda_node.size() << '\n'; - for (const auto & [alias_name, node] : alias_name_to_lambda_node) + buffer << "Alias name to function node table size " << aliases.alias_name_to_lambda_node.size() << '\n'; + for (const auto & [alias_name, node] : aliases.alias_name_to_lambda_node) buffer << "Alias name " << alias_name << " lambda node " << node->formatASTForErrorMessage() << '\n'; - buffer << "Alias name to table expression node table size " << alias_name_to_table_expression_node.size() << '\n'; - for (const auto & [alias_name, node] : alias_name_to_table_expression_node) + buffer << "Alias name to table expression node table size " << aliases.alias_name_to_table_expression_node.size() << '\n'; + for (const auto & [alias_name, node] : aliases.alias_name_to_table_expression_node) buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n'; buffer << "CTE name to query node table size " << cte_name_to_query_node.size() << '\n'; @@ -936,8 +997,8 @@ struct IdentifierResolveScope for (const auto & [window_name, node] : window_name_to_window_node) buffer << "CTE name " << window_name << " node " << node->formatASTForErrorMessage() << '\n'; - buffer << "Nodes with duplicated aliases size " << nodes_with_duplicated_aliases.size() << '\n'; - for (const auto & node : nodes_with_duplicated_aliases) + buffer << "Nodes with duplicated aliases size " << aliases.nodes_with_duplicated_aliases.size() << '\n'; + for (const auto & node : aliases.nodes_with_duplicated_aliases) buffer << "Alias name " << node->getAlias() << " node " << node->formatASTForErrorMessage() << '\n'; buffer << "Expression resolve process stack " << '\n'; @@ -996,8 +1057,8 @@ struct IdentifierResolveScope class QueryExpressionsAliasVisitor : public InDepthQueryTreeVisitor { public: - explicit QueryExpressionsAliasVisitor(IdentifierResolveScope & scope_) - : scope(scope_) + explicit QueryExpressionsAliasVisitor(ScopeAliases & aliases_) + : aliases(aliases_) {} void visitImpl(QueryTreeNodePtr & node) @@ -1034,10 +1095,10 @@ public: private: void addDuplicatingAlias(const QueryTreeNodePtr & node) { - scope.nodes_with_duplicated_aliases.emplace(node); + aliases.nodes_with_duplicated_aliases.emplace(node); auto cloned_node = node->clone(); - scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); - scope.nodes_with_duplicated_aliases.emplace(cloned_node); + aliases.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node); + aliases.nodes_with_duplicated_aliases.emplace(cloned_node); } void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node) @@ -1053,25 +1114,29 @@ private: if (is_lambda_node) { - if (scope.alias_name_to_expression_node->contains(alias)) + if (aliases.alias_name_to_expression_node->contains(alias)) addDuplicatingAlias(node); - auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); + auto [_, inserted] = aliases.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); if (!inserted) addDuplicatingAlias(node); return; } - if (scope.alias_name_to_lambda_node.contains(alias)) - addDuplicatingAlias(node); + if (aliases.alias_name_to_lambda_node.contains(alias)) + addDuplicatingAlias(node); - auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node)); + auto [_, inserted] = aliases.alias_name_to_expression_node->insert(std::make_pair(alias, node)); if (!inserted) - addDuplicatingAlias(node); + addDuplicatingAlias(node); + + /// If node is identifier put it into transitive aliases map. + if (const auto * identifier = typeid_cast(node.get())) + aliases.transitive_aliases.insert(std::make_pair(alias, identifier->getIdentifier())); } - IdentifierResolveScope & scope; + ScopeAliases & aliases; }; class TableExpressionsAliasVisitor : public InDepthQueryTreeVisitor @@ -1118,7 +1183,7 @@ private: return; const auto & node_alias = node->getAlias(); - auto [_, inserted] = scope.alias_name_to_table_expression_node.emplace(node_alias, node); + auto [_, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(node_alias, node); if (!inserted) throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, "Multiple table expressions with same alias {}. In scope {}", @@ -1189,7 +1254,7 @@ public: } case QueryTreeNodeType::TABLE_FUNCTION: { - QueryExpressionsAliasVisitor expressions_alias_visitor(scope); + QueryExpressionsAliasVisitor expressions_alias_visitor(scope.aliases); resolveTableFunction(node, scope, expressions_alias_visitor, false /*nested_table_function*/); break; } @@ -1864,7 +1929,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection( if (allow_expression_identifiers) { - for (const auto & [name, expression] : *scope.alias_name_to_expression_node) + for (const auto & [name, expression] : *scope.aliases.alias_name_to_expression_node) { assert(expression); auto expression_identifier = Identifier(name); @@ -1894,13 +1959,13 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection( { if (allow_function_identifiers) { - for (const auto & [name, _] : *scope.alias_name_to_expression_node) + for (const auto & [name, _] : *scope.aliases.alias_name_to_expression_node) valid_identifiers_result.insert(Identifier(name)); } if (allow_table_expression_identifiers) { - for (const auto & [name, _] : scope.alias_name_to_table_expression_node) + for (const auto & [name, _] : scope.aliases.alias_name_to_table_expression_node) valid_identifiers_result.insert(Identifier(name)); } } @@ -2789,21 +2854,22 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope) { - const auto & identifier_bind_part = identifier_lookup.identifier.front(); + //const auto & identifier_bind_part = identifier_lookup.identifier.front(); + return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr; - auto get_alias_name_to_node_map = [&]() -> const std::unordered_map & - { - if (identifier_lookup.isExpressionLookup()) - return *scope.alias_name_to_expression_node; - else if (identifier_lookup.isFunctionLookup()) - return scope.alias_name_to_lambda_node; + // auto get_alias_name_to_node_map = [&]() -> const std::unordered_map & + // { + // if (identifier_lookup.isExpressionLookup()) + // return *scope.alias_name_to_expression_node; + // else if (identifier_lookup.isFunctionLookup()) + // return scope.alias_name_to_lambda_node; - return scope.alias_name_to_table_expression_node; - }; + // return scope.alias_name_to_table_expression_node; + // }; - const auto & alias_name_to_node_map = get_alias_name_to_node_map(); + // const auto & alias_name_to_node_map = get_alias_name_to_node_map(); - return alias_name_to_node_map.contains(identifier_bind_part); + // return alias_name_to_node_map.contains(identifier_bind_part); } /** Resolve identifier from scope aliases. @@ -2853,23 +2919,29 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier { const auto & identifier_bind_part = identifier_lookup.identifier.front(); - auto get_alias_name_to_node_map = [&]() -> std::unordered_map & - { - if (identifier_lookup.isExpressionLookup()) - return *scope.alias_name_to_expression_node; - else if (identifier_lookup.isFunctionLookup()) - return scope.alias_name_to_lambda_node; + // auto get_alias_name_to_node_map = [&]() -> std::unordered_map & + // { + // if (identifier_lookup.isExpressionLookup()) + // return *scope.alias_name_to_expression_node; + // else if (identifier_lookup.isFunctionLookup()) + // return scope.alias_name_to_lambda_node; - return scope.alias_name_to_table_expression_node; - }; + // return scope.alias_name_to_table_expression_node; + // }; - auto & alias_name_to_node_map = get_alias_name_to_node_map(); - auto it = alias_name_to_node_map.find(identifier_bind_part); + // auto & alias_name_to_node_map = get_alias_name_to_node_map(); + // auto it = alias_name_to_node_map.find(identifier_bind_part); - if (it == alias_name_to_node_map.end()) + // if (it == alias_name_to_node_map.end()) + // return {}; + + auto it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME); + if (it == nullptr) return {}; - if (!it->second) + QueryTreeNodePtr & alias_node = *it; + + if (!alias_node) throw Exception(ErrorCodes::LOGICAL_ERROR, "Node with alias {} is not valid. In scope {}", identifier_bind_part, @@ -2889,14 +2961,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier return {}; } - auto node_type = it->second->getNodeType(); + auto node_type = alias_node->getNodeType(); /// Resolve expression if necessary if (node_type == QueryTreeNodeType::IDENTIFIER) { - scope.pushExpressionNode(it->second); + scope.pushExpressionNode(alias_node); - auto & alias_identifier_node = it->second->as(); + auto & alias_identifier_node = alias_node->as(); auto identifier = alias_identifier_node.getIdentifier(); auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); if (!lookup_result.resolved_identifier) @@ -2912,7 +2984,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier getHintsErrorMessageSuffix(hints)); } - it->second = lookup_result.resolved_identifier; + alias_node = lookup_result.resolved_identifier; /** During collection of aliases if node is identifier and has alias, we cannot say if it is * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification. @@ -2922,33 +2994,31 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier * If we resolved identifier node as function, we must remove identifier node alias from * expression alias map. */ - if (identifier_lookup.isExpressionLookup()) - scope.alias_name_to_lambda_node.erase(identifier_bind_part); - else if (identifier_lookup.isFunctionLookup()) - scope.alias_name_to_expression_node->erase(identifier_bind_part); + // if (identifier_lookup.isExpressionLookup()) + // scope.alises.alias_name_to_lambda_node.erase(identifier_bind_part); + // else if (identifier_lookup.isFunctionLookup()) + // scope.aliases.alias_name_to_expression_node->erase(identifier_bind_part); scope.popExpressionNode(); } else if (node_type == QueryTreeNodeType::FUNCTION) { - resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + resolveExpressionNode(alias_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } else if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) { if (identifier_resolve_settings.allow_to_resolve_subquery_during_identifier_resolution) - resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/); + resolveExpressionNode(alias_node, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/); } - QueryTreeNodePtr result = it->second; - - if (identifier_lookup.identifier.isCompound() && result) + if (identifier_lookup.identifier.isCompound() && alias_node) { if (identifier_lookup.isExpressionLookup()) { return tryResolveIdentifierFromCompoundExpression( identifier_lookup.identifier, 1 /*identifier_bind_size*/, - it->second, + alias_node, {} /* compound_expression_source */, scope, identifier_resolve_settings.allow_to_check_join_tree /* can_be_not_found */); @@ -2963,7 +3033,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier } } - return result; + return alias_node; } /** Resolve identifier from table columns. @@ -4124,10 +4194,12 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook * SELECT id FROM ( SELECT ... ) AS subquery ARRAY JOIN [0] AS id INNER JOIN second_table USING (id) * In the example, identifier `id` should be resolved into one from USING (id) column. */ - auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName()); - if (alias_it != scope.alias_name_to_expression_node->end() && alias_it->second->getNodeType() == QueryTreeNodeType::COLUMN) + + auto alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME); + //auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName()); + if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN) { - const auto & column_node = alias_it->second->as(); + const auto & column_node = (*alias_it)->as(); if (column_node.getColumnSource()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) prefer_column_name_to_alias = true; } @@ -5232,7 +5304,7 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod scope.scope_node->formatASTForErrorMessage()); /// Initialize aliases in lambda scope - QueryExpressionsAliasVisitor visitor(scope); + QueryExpressionsAliasVisitor visitor(scope.aliases); visitor.visit(lambda_to_resolve.getExpression()); /** Replace lambda arguments with new arguments. @@ -5252,8 +5324,8 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod const auto & lambda_argument_name = lambda_argument_identifier ? lambda_argument_identifier->getIdentifier().getFullName() : lambda_argument_column->getColumnName(); - bool has_expression_node = scope.alias_name_to_expression_node->contains(lambda_argument_name); - bool has_alias_node = scope.alias_name_to_lambda_node.contains(lambda_argument_name); + bool has_expression_node = scope.aliases.alias_name_to_expression_node->contains(lambda_argument_name); + bool has_alias_node = scope.aliases.alias_name_to_lambda_node.contains(lambda_argument_name); if (has_expression_node || has_alias_node) { @@ -5929,7 +6001,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi function_names = AggregateFunctionFactory::instance().getAllRegisteredNames(); possible_function_names.insert(possible_function_names.end(), function_names.begin(), function_names.end()); - for (auto & [name, lambda_node] : scope.alias_name_to_lambda_node) + for (auto & [name, lambda_node] : scope.aliases.alias_name_to_lambda_node) { if (lambda_node->getNodeType() == QueryTreeNodeType::LAMBDA) possible_function_names.push_back(name); @@ -6263,7 +6335,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id result_projection_names.push_back(node_alias); } - bool is_duplicated_alias = scope.nodes_with_duplicated_aliases.contains(node); + bool is_duplicated_alias = scope.aliases.nodes_with_duplicated_aliases.contains(node); if (is_duplicated_alias) scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); @@ -6287,14 +6359,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * * To resolve b we need to resolve a. */ - auto it = scope.alias_name_to_expression_node->find(node_alias); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(node_alias); + if (it != scope.aliases.alias_name_to_expression_node->end()) node = it->second; if (allow_lambda_expression) { - it = scope.alias_name_to_lambda_node.find(node_alias); - if (it != scope.alias_name_to_lambda_node.end()) + it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + if (it != scope.aliases.alias_name_to_lambda_node.end()) node = it->second; } } @@ -6320,15 +6392,15 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id result_projection_names.push_back(projection_name_it->second); } - if (resolved_identifier_node && !node_alias.empty()) - scope.alias_name_to_lambda_node.erase(node_alias); + // if (resolved_identifier_node && !node_alias.empty()) + // scope.alias_name_to_lambda_node.erase(node_alias); if (!resolved_identifier_node && allow_lambda_expression) { resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier; - if (resolved_identifier_node && !node_alias.empty()) - scope.alias_name_to_expression_node->erase(node_alias); + // if (resolved_identifier_node && !node_alias.empty()) + // scope.alias_name_to_expression_node->erase(node_alias); } if (!resolved_identifier_node && allow_table_expression) @@ -6569,14 +6641,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id */ if (!node_alias.empty() && use_alias_table && !scope.group_by_use_nulls) { - auto it = scope.alias_name_to_expression_node->find(node_alias); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(node_alias); + if (it != scope.aliases.alias_name_to_expression_node->end()) it->second = node; if (allow_lambda_expression) { - it = scope.alias_name_to_lambda_node.find(node_alias); - if (it != scope.alias_name_to_lambda_node.end()) + it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + if (it != scope.aliases.alias_name_to_lambda_node.end()) it->second = node; } } @@ -6949,8 +7021,8 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod resolved_identifier = resolved_identifier->clone(); /// Update alias name to table expression map - auto table_expression_it = scope.alias_name_to_table_expression_node.find(from_table_identifier_alias); - if (table_expression_it != scope.alias_name_to_table_expression_node.end()) + auto table_expression_it = scope.aliases.alias_name_to_table_expression_node.find(from_table_identifier_alias); + if (table_expression_it != scope.aliases.alias_name_to_table_expression_node.end()) table_expression_it->second = resolved_identifier; auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers(); @@ -7149,7 +7221,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table alias_column_resolve_scope.context = scope.context; /// Initialize aliases in alias column scope - QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope); + QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope.aliases); visitor.visit(alias_column_to_resolve->getExpression()); resolveExpressionNode(alias_column_resolve_scope.scope_node, @@ -7519,7 +7591,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif for (auto & array_join_expression : array_join_nodes) { auto array_join_expression_alias = array_join_expression->getAlias(); - if (!array_join_expression_alias.empty() && scope.alias_name_to_expression_node->contains(array_join_expression_alias)) + if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias)) throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, "ARRAY JOIN expression {} with duplicate alias {}. In scope {}", array_join_expression->formatASTForErrorMessage(), @@ -7613,8 +7685,8 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif array_join_nodes = std::move(array_join_column_expressions); for (auto & array_join_column_expression : array_join_nodes) { - auto it = scope.alias_name_to_expression_node->find(array_join_column_expression->getAlias()); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias()); + if (it != scope.aliases.alias_name_to_expression_node->end()) { auto & array_join_column_expression_typed = array_join_column_expression->as(); auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), @@ -7911,7 +7983,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, if (alias_name.empty()) return; - auto [it, inserted] = scope.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node); + auto [it, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node); if (!inserted) throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, "Duplicate aliases {} for table expressions in FROM section are not allowed. Try to register {}. Already registered {}.", @@ -7980,7 +8052,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of QUALIFY"); /// Initialize aliases in query node scope - QueryExpressionsAliasVisitor visitor(scope); + QueryExpressionsAliasVisitor visitor(scope.aliases); if (query_node_typed.hasWith()) visitor.visit(query_node_typed.getWithNode()); @@ -8098,7 +8170,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier table_expressions_visitor.visit(query_node_typed.getJoinTree()); initializeQueryJoinTreeNode(query_node_typed.getJoinTree(), scope); - scope.alias_name_to_table_expression_node.clear(); + scope.aliases.alias_name_to_table_expression_node.clear(); resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor); } @@ -8148,10 +8220,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier /// Clone is needed cause aliases share subtrees. /// If not clone, the same (shared) subtree could be resolved again with different (Nullable) type /// See 03023_group_by_use_nulls_analyzer_crashes - for (auto & [key, node] : scope.alias_name_to_expression_node_before_group_by) - scope.alias_name_to_expression_node_after_group_by[key] = node->clone(); + for (auto & [key, node] : scope.aliases.alias_name_to_expression_node_before_group_by) + scope.aliases.alias_name_to_expression_node_after_group_by[key] = node->clone(); - scope.alias_name_to_expression_node = &scope.alias_name_to_expression_node_after_group_by; + scope.aliases.alias_name_to_expression_node = &scope.aliases.alias_name_to_expression_node_after_group_by; } if (query_node_typed.hasHaving()) @@ -8223,7 +8295,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier * After scope nodes are resolved, we can compare node with duplicate alias with * node from scope alias table. */ - for (const auto & node_with_duplicated_alias : scope.cloned_nodes_with_duplicated_aliases) + for (const auto & node_with_duplicated_alias : scope.aliases.cloned_nodes_with_duplicated_aliases) { auto node = node_with_duplicated_alias; auto node_alias = node->getAlias(); @@ -8234,8 +8306,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier bool has_node_in_alias_table = false; - auto it = scope.alias_name_to_expression_node->find(node_alias); - if (it != scope.alias_name_to_expression_node->end()) + auto it = scope.aliases.alias_name_to_expression_node->find(node_alias); + if (it != scope.aliases.alias_name_to_expression_node->end()) { has_node_in_alias_table = true; @@ -8248,8 +8320,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier scope.scope_node->formatASTForErrorMessage()); } - it = scope.alias_name_to_lambda_node.find(node_alias); - if (it != scope.alias_name_to_lambda_node.end()) + it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + if (it != scope.aliases.alias_name_to_lambda_node.end()) { has_node_in_alias_table = true; @@ -8294,10 +8366,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier /// Remove aliases from expression and lambda nodes - for (auto & [_, node] : *scope.alias_name_to_expression_node) + for (auto & [_, node] : *scope.aliases.alias_name_to_expression_node) node->removeAlias(); - for (auto & [_, node] : scope.alias_name_to_lambda_node) + for (auto & [_, node] : scope.aliases.alias_name_to_lambda_node) node->removeAlias(); query_node_typed.resolveProjectionColumns(std::move(projection_columns)); diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference index 3733d6b6084..e39cdce92b0 100644 --- a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference @@ -17,3 +17,4 @@ Alias conflict with identifier inside expression Alias setting prefer_column_name_to_alias 0 Value +/a/b/c diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql index 52a1cd1dae8..467073fc4e8 100644 --- a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql @@ -48,3 +48,5 @@ WITH id AS value SELECT value FROM test_table; SET prefer_column_name_to_alias = 0; DROP TABLE test_table; + +WITH path('clickhouse.com/a/b/c') AS x SELECT x AS path; diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql index 0c257cf6f18..25928acb2c3 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.sql +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -93,3 +93,11 @@ SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda2(tuple(x), x + 1), 1 DROP TABLE test_table_tuple; DROP TABLE test_table; + +WITH x -> (lambda(x) + 1) AS lambda +SELECT lambda(1); -- {serverError UNSUPPORTED_METHOD } + +WITH + x -> (lambda1(x) + 1) AS lambda, + lambda AS lambda1 +SELECT lambda(1); -- {serverError UNSUPPORTED_METHOD } From d4430b583c4e4531ad1372fd3e40ff6bad5a414d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 21 May 2024 16:19:14 +0200 Subject: [PATCH 472/651] Create snapshot --- utils/keeper-bench/Runner.cpp | 100 +++++++++++++++++----------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 0050230b6ec..a625a7f157d 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -628,7 +628,11 @@ struct ZooKeeperRequestFromLogReader set_request->path = current_block->getPath(idx_in_block); set_request->data = current_block->getData(idx_in_block); if (auto version = current_block->getVersion(idx_in_block)) - set_request->version = *version; + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + set_request->version = std::numeric_limits::max(); + } request_from_log.request = set_request; break; } @@ -637,7 +641,11 @@ struct ZooKeeperRequestFromLogReader auto remove_request = std::make_shared(); remove_request->path = current_block->getPath(idx_in_block); if (auto version = current_block->getVersion(idx_in_block)) - remove_request->version = *version; + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + remove_request->version = std::numeric_limits::max(); + } request_from_log.request = remove_request; break; } @@ -647,7 +655,11 @@ struct ZooKeeperRequestFromLogReader auto check_request = std::make_shared(); check_request->path = current_block->getPath(idx_in_block); if (auto version = current_block->getVersion(idx_in_block)) - check_request->version = *version; + { + /// we just need to make sure that the request with version that need to fail, fail when replaying + if (request_from_log.expected_result == Coordination::Error::ZBADVERSION) + check_request->version = std::numeric_limits::max(); + } if (op_num == Coordination::OpNum::CheckNotExists) check_request->not_exists = true; request_from_log.request = check_request; @@ -791,10 +803,12 @@ struct SetupNodeCollector if (!request_from_log.expected_result.has_value()) return; + auto process_request = [&](const Coordination::ZooKeeperRequest & request, const auto expected_result) { const auto & path = request.getPath(); - if (processed_paths.contains(path)) + + if (nodes_created_during_replay.contains(path)) return; auto op_num = request.getOpNum(); @@ -804,64 +818,43 @@ struct SetupNodeCollector if (expected_result == Coordination::Error::ZNODEEXISTS) { addExpectedNode(path); - processed_paths.insert(path); } else if (expected_result == Coordination::Error::ZOK) { + nodes_created_during_replay.insert(path); /// we need to make sure ancestors exist auto position = path.find_last_of('/'); if (position != 0) { auto parent_path = path.substr(0, position); - if (!processed_paths.contains(parent_path)) - { - addExpectedNode(parent_path); - processed_paths.insert(parent_path); - } + addExpectedNode(parent_path); } - - processed_paths.insert(path); } } else if (op_num == Coordination::OpNum::Remove) { - if (expected_result == Coordination::Error::ZOK) - { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) addExpectedNode(path); - processed_paths.insert(path); - } } else if (op_num == Coordination::OpNum::Set) { - if (expected_result == Coordination::Error::ZOK) - { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) addExpectedNode(path); - processed_paths.insert(path); - } } else if (op_num == Coordination::OpNum::Check) { - if (expected_result == Coordination::Error::ZOK) - { + if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION) addExpectedNode(path); - processed_paths.insert(path); - } } else if (op_num == Coordination::OpNum::CheckNotExists) { - if (expected_result == Coordination::Error::ZNODEEXISTS) - { + if (expected_result == Coordination::Error::ZNODEEXISTS || expected_result == Coordination::Error::ZBADVERSION) addExpectedNode(path); - processed_paths.insert(path); - } } else if (request.isReadRequest()) { if (expected_result == Coordination::Error::ZOK) - { addExpectedNode(path); - processed_paths.insert(path); - } } }; @@ -940,7 +933,7 @@ struct SetupNodeCollector std::mutex nodes_mutex; DB::KeeperContextPtr keeper_context; Coordination::KeeperStoragePtr initial_storage; - std::unordered_set processed_paths; + std::unordered_set nodes_created_during_replay; std::optional snapshot_manager; }; @@ -979,23 +972,23 @@ void requestFromLogExecutor(std::shared_ptrtoString(), response.error, *expected_result) - << std::endl; + //if (*expected_result != response.error) + //{ + // std::cerr << fmt::format( + // "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result) + // << std::endl; - if (const auto * multi_response = dynamic_cast(&response)) - { - std::string subresponses; - for (size_t i = 0; i < multi_response->responses.size(); ++i) - { - subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); - } + // if (const auto * multi_response = dynamic_cast(&response)) + // { + // std::string subresponses; + // for (size_t i = 0; i < multi_response->responses.size(); ++i) + // { + // subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); + // } - std::cerr << "Subresponses\n" << subresponses << std::endl; - } - } + // std::cerr << "Subresponses\n" << subresponses << std::endl; + // } + //} } request_promise->set_value(); @@ -1049,7 +1042,7 @@ void Runner::runBenchmarkFromLog() std::unordered_map>> executor_id_to_queue; - SCOPE_EXIT({ + SCOPE_EXIT_SAFE({ for (const auto & [executor_id, executor_queue] : executor_id_to_queue) executor_queue->finish(); @@ -1262,8 +1255,15 @@ Runner::~Runner() if (pool) pool->wait(); - auto connection = getConnection(connection_infos[0], 0); - benchmark_context.cleanup(*connection); + try + { + auto connection = getConnection(connection_infos[0], 0); + benchmark_context.cleanup(*connection); + } + catch (...) + { + DB::tryLogCurrentException("While trying to clean nodes"); + } } namespace From 23eaa0de40d92d61e453a86dfa7c1a38b5d67b75 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 21 May 2024 14:28:19 +0000 Subject: [PATCH 473/651] Fix style. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index e50ad7911a0..7ecb91e7972 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -615,7 +615,7 @@ struct ScopeAliases case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node; } - __builtin_unreachable(); + UNREACHABLE(); } enum class FindOption @@ -632,7 +632,7 @@ struct ScopeAliases case FindOption::FULL_NAME: return identifier.getFullName(); } - __builtin_unreachable(); + UNREACHABLE(); } QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) From dd9bb8fe9cc2d3187906cd65e0757ae29c67f032 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 16:54:28 +0200 Subject: [PATCH 474/651] Add tests --- .../02931_max_num_to_warn.reference | 2 + .../0_stateless/02931_max_num_to_warn.sql | 43 ++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference index 7de998eebfa..419149b0bd2 100644 --- a/tests/queries/0_stateless/02931_max_num_to_warn.reference +++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference @@ -1,3 +1,5 @@ The number of attached tables is more than 5 +The number of attached views is more than 5 +The number of attached dictionaries is more than 5 The number of attached databases is more than 2 The number of active parts is more than 10 diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql index 23f04816d5a..4087a536cd0 100644 --- a/tests/queries/0_stateless/02931_max_num_to_warn.sql +++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql @@ -13,6 +13,41 @@ CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_9 (id CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_10 (id Int32, str String) Engine=Memory; CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_11 (id Int32, str String) Engine=Memory; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_1 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_1; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_2 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_2; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_3 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_3; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_4 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_4; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_5 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_5; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_6 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_6; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_7 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_7; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_8 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_8; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_9 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_9; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_10 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_10; +CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_11 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_11; + +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_1 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_1'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_2 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_2'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_3 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_3'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_4 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_4'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_5 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_5'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_6 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_6'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_7 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_7'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_8 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_8'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_9 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_9'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_10 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_10'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); +CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_11 (id Int32, str String) PRIMARY KEY id +SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_11'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); + CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_1; CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_2; CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_3; @@ -37,7 +72,13 @@ INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_9 VALUES (1, 'Hello' INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_10 VALUES (1, 'Hello'); INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello'); -SELECT * FROM system.warnings where message in ('The number of attached tables is more than 5', 'The number of attached databases is more than 2', 'The number of active parts is more than 10'); +SELECT * FROM system.warnings where message in ( + 'The number of attached tables is more than 5', + 'The number of attached views is more than 5', + 'The number of attached dictionaries is more than 5', + 'The number of attached databases is more than 2', + 'The number of active parts is more than 10' +); DROP DATABASE IF EXISTS test_max_num_to_warn_02931; DROP DATABASE IF EXISTS test_max_num_to_warn_1; From f1f8a35bab0e9dc46aa46faa4c3be7609b77a509 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 15:03:16 +0000 Subject: [PATCH 475/651] Fix #64136 --- src/Interpreters/Cache/QueryCache.cpp | 26 ++++++++++++---- src/Interpreters/Cache/QueryCache.h | 3 +- src/Interpreters/executeQuery.cpp | 4 +-- .../02494_query_cache_use_database.reference | 2 ++ .../02494_query_cache_use_database.sql | 30 +++++++++++++++++++ 5 files changed, 56 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.reference create mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.sql diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index fafe50c170f..2fddbc0b044 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -177,6 +177,22 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast) return transformed_ast; } +IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database) +{ + ast = removeQueryCacheSettings(ast); + + /// Hash the AST, it must consider aliases (issue #56258) + constexpr bool ignore_aliases = false; + IAST::Hash ast_hash = ast->getTreeHash(ignore_aliases); + + /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different tables (issue #64136) + IAST::Hash cur_database_hash = CityHash_v1_0_2::CityHash128(current_database.data(), current_database.size()); + UInt64 low_combined = ast_hash.low64 ^ cur_database_hash.low64; + UInt64 high_combined = ast_hash.high64 ^ cur_database_hash.high64; + + return {low_combined, high_combined}; +} + String queryStringFromAST(ASTPtr ast) { WriteBufferFromOwnString buf; @@ -186,17 +202,15 @@ String queryStringFromAST(ASTPtr ast) } -/// Hashing of ASTs must consider aliases (issue #56258) -static constexpr bool ignore_aliases = false; - QueryCache::Key::Key( ASTPtr ast_, + String current_database, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, std::chrono::time_point expires_at_, bool is_compressed_) - : ast_hash(removeQueryCacheSettings(ast_)->getTreeHash(ignore_aliases)) + : ast_hash(calculateAstHash(ast_, current_database)) , header(header_) , user_id(user_id_) , current_user_roles(current_user_roles_) @@ -207,8 +221,8 @@ QueryCache::Key::Key( { } -QueryCache::Key::Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_) - : QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name +QueryCache::Key::Key(ASTPtr ast_, String current_database, std::optional user_id_, const std::vector & current_user_roles_) + : QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles { } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 814cad37f82..c234ea3d464 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -88,6 +88,7 @@ public: /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, + String current_database, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, @@ -95,7 +96,7 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, String current_database, std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f1f72a4ea4a..90e6406c792 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1102,7 +1102,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles()); + QueryCache::Key key(ast, context->getCurrentDatabase(), context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1225,7 +1225,7 @@ static std::tuple executeQueryImpl( && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( - ast, res.pipeline.getHeader(), + ast, context->getCurrentDatabase(), res.pipeline.getHeader(), context->getUserID(), context->getCurrentRoles(), settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.reference b/tests/queries/0_stateless/02494_query_cache_use_database.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_use_database.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.sql b/tests/queries/0_stateless/02494_query_cache_use_database.sql new file mode 100644 index 00000000000..df560f82ebb --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_use_database.sql @@ -0,0 +1,30 @@ +-- Tags: no-parallel, no-fasttest +-- Tag no-fasttest: Depends on OpenSSL +-- Tag no-parallel: Messes with internal cache + +-- Test for issue #64136 + +SYSTEM DROP QUERY CACHE; + +DROP DATABASE IF EXISTS db1; +DROP DATABASE IF EXISTS db2; + +CREATE DATABASE db1; +CREATE DATABASE db2; + +CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a); +CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a); + +INSERT INTO db1.tab values(1); +INSERT INTO db2.tab values(2); + +USE db1; +SELECT * FROM tab SETTINGS use_query_cache=1; + +USE db2; +SELECT * FROM tab SETTINGS use_query_cache=1; + +DROP DATABASE db1; +DROP DATABASE db2; + +SYSTEM DROP QUERY CACHE; From 3dbf32a558458b50bafb017d45b83446ef0ec2e8 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 17:03:43 +0200 Subject: [PATCH 476/651] Remove dict creation --- tests/queries/0_stateless/02931_max_num_to_warn.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql index 4087a536cd0..1c96e017646 100644 --- a/tests/queries/0_stateless/02931_max_num_to_warn.sql +++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql @@ -45,8 +45,6 @@ CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_ SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_9'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_10 (id Int32, str String) PRIMARY KEY id SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_10'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); -CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_11 (id Int32, str String) PRIMARY KEY id -SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_11'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_1; CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_2; From ac7da1cc388edf03fd189bd24376c4a571c7b12a Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 21 May 2024 12:57:05 +0200 Subject: [PATCH 477/651] CI: cancel running PR wf after adding to MQ --- .github/workflows/merge_queue.yml | 3 + tests/ci/ci.py | 45 ++++++++++-- tests/ci/ci_metadata.py | 112 ++++++++++++++++++++++++++++++ tests/ci/github_helper.py | 19 +++++ 4 files changed, 172 insertions(+), 7 deletions(-) create mode 100644 tests/ci/ci_metadata.py diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 1b6cc320ec4..97aa0db4cdb 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -22,6 +22,9 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get version filter: tree:0 + - name: Cancel PR workflow + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 3a616c8aad6..046550c62f8 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -44,6 +44,7 @@ from env_helper import ( REPORT_PATH, S3_BUILDS_BUCKET, TEMP_PATH, + GITHUB_RUN_ID, ) from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, Git @@ -52,6 +53,7 @@ from github_helper import GitHub from pr_info import PRInfo from report import ERROR, SUCCESS, BuildResult, JobReport from s3_helper import S3Helper +from ci_metadata import CiMetadata from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -66,12 +68,12 @@ class PendingState: class CiCache: """ CI cache is a bunch of records. Record is a file stored under special location on s3. - The file name has following format + The file name has a format: _[]--___.ci RECORD_TYPE: - SUCCESSFUL - for successfuly finished jobs + SUCCESSFUL - for successful jobs PENDING - for pending jobs ATTRIBUTES: @@ -991,7 +993,11 @@ def normalize_check_name(check_name: str) -> str: def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: - # FIXME: consider switching to sub_parser for configure, pre, run, post actions + parser.add_argument( + "--cancel-previous-run", + action="store_true", + help="Action that cancels previous running PR workflow if PR added into the Merge Queue", + ) parser.add_argument( "--configure", action="store_true", @@ -1000,17 +1006,19 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: parser.add_argument( "--update-gh-statuses", action="store_true", - help="Action that recreate success GH statuses for jobs that finished successfully in past and will be skipped this time", + help="Action that recreate success GH statuses for jobs that finished successfully in past and will be " + "skipped this time", ) parser.add_argument( "--pre", action="store_true", - help="Action that executes prerequesetes for the job provided in --job-name", + help="Action that executes prerequisites for the job provided in --job-name", ) parser.add_argument( "--run", action="store_true", - help="Action that executes run action for specified --job-name. run_command must be configured for a given job name.", + help="Action that executes run action for specified --job-name. run_command must be configured for a given " + "job name.", ) parser.add_argument( "--post", @@ -1088,7 +1096,8 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: "--rebuild-all-binaries", action="store_true", default=False, - help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in any case, used in --configure action (for release branches)", + help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in " + "any case, used in --configure action (for release branches)", ) parser.add_argument( "--commit-message", @@ -1902,6 +1911,15 @@ def _get_ext_check_name(check_name: str) -> str: return check_name_with_group +def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None: + run_id = CiMetadata(s3, pr_number).run_id + if not run_id: + print("ERROR: FIX IT: Run id has not been found!") + else: + print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]") + GitHub.cancel_wf(run_id) + + def main() -> int: logging.basicConfig(level=logging.INFO) exit_code = 0 @@ -1930,6 +1948,12 @@ def main() -> int: ### CONFIGURE action: start if args.configure: + if CI and pr_info.is_pr: + # store meta on s3 (now we need it only for PRs) + meta = CiMetadata(s3, pr_info.number) + meta.run_id = int(GITHUB_RUN_ID) + meta.push_meta() + ci_options = CiOptions.create_from_pr_message( args.commit_message or None, update_from_api=True ) @@ -2222,6 +2246,13 @@ def main() -> int: assert indata, "Run config must be provided via --infile" _update_gh_statuses_action(indata=indata, s3=s3) + ### CANCEL PREVIOUS WORKFLOW RUN + elif args.cancel_previous_run: + assert ( + pr_info.is_merge_queue + ), "Currently it's supposed to be used in MQ wf to cancel running PR wf if any" + _cancel_pr_wf(s3, pr_info.merged_pr) + ### print results _print_results(result, args.outfile, args.pretty) diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py new file mode 100644 index 00000000000..5856e9a8501 --- /dev/null +++ b/tests/ci/ci_metadata.py @@ -0,0 +1,112 @@ +from pathlib import Path +from typing import Optional + +from env_helper import ( + S3_BUILDS_BUCKET, + TEMP_PATH, +) +from s3_helper import S3Helper + + +# pylint: disable=too-many-lines + + +class CiMetadata: + """ + CI Metadata class owns data like workflow run_id for a given pr, etc. + Goal is to have everything we need to manage workflows on S3 and rely on GH api as little as possible + """ + + _S3_PREFIX = "CI_meta_v1" + _LOCAL_PATH = Path(TEMP_PATH) / "ci_meta" + _FILE_SUFFIX = ".cimd" + _FILENAME_RUN_ID = "run_id" + _FILE_SUFFIX + + def __init__( + self, + s3: S3Helper, + pr_number: Optional[int] = None, + sha: Optional[str] = None, + git_ref: Optional[str] = None, + ): + assert pr_number or (sha and git_ref) + + self.sha = sha + self.pr_number = pr_number + self.git_ref = git_ref + self.s3 = s3 + self.run_id = 0 + + if self.pr_number: + self.s3_path = f"{self._S3_PREFIX}/PRs/{self.pr_number}/" + else: + self.s3_path = f"{self._S3_PREFIX}/{self.git_ref}/{self.sha}/" + + self._updated = False + + if not self._LOCAL_PATH.exists(): + self._LOCAL_PATH.mkdir(parents=True, exist_ok=True) + + def fetch_meta(self): + """ + Fetches meta from s3 + """ + + # clean up + for file in self._LOCAL_PATH.glob("*" + self._FILE_SUFFIX): + file.unlink() + + _ = self.s3.download_files( + bucket=S3_BUILDS_BUCKET, + s3_path=self.s3_path, + file_suffix=self._FILE_SUFFIX, + local_directory=self._LOCAL_PATH, + ) + + meta_files = Path(self._LOCAL_PATH).rglob("*" + self._FILE_SUFFIX) + for file_name in meta_files: + path_in_str = str(file_name) + with open(path_in_str, "r", encoding="utf-8") as f: + # Read all lines in the file + lines = f.readlines() + assert len(lines) == 1 + if file_name.name == self._FILENAME_RUN_ID: + self.run_id = int(lines[0]) + + self._updated = True + return self + + def push_meta( + self, + ) -> None: + """ + Uploads meta on s3 + """ + assert self.run_id + print("Storing workflow meta on s3") + + local_file = self._LOCAL_PATH / self._FILENAME_RUN_ID + with open(local_file, "w", encoding="utf-8") as file: + file.write(f"{self.run_id}\n") + + _ = self.s3.upload_file( + bucket=S3_BUILDS_BUCKET, + file_path=local_file, + s3_path=self.s3_path + local_file.name, + ) + + +if __name__ == "__main__": + # TEST: + s3 = S3Helper() + a = CiMetadata(s3, 12345, "deadbeaf", "test_branch") + a.run_id = 111 + a.push_meta() + b = CiMetadata(s3, 12345, "deadbeaf", "test_branch") + assert b.fetch_meta().run_id == a.run_id + + a = CiMetadata(s3, 0, "deadbeaf", "test_branch") + a.run_id = 112 + a.push_meta() + b = CiMetadata(s3, 0, "deadbeaf", "test_branch") + assert b.fetch_meta().run_id == a.run_id diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index ae1eaf4c06a..81603c66bae 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -9,6 +9,7 @@ from time import sleep from typing import List, Optional, Tuple, Union import github +import requests # explicit reimport # pylint: disable=useless-import-alias @@ -21,6 +22,9 @@ from github.NamedUser import NamedUser as NamedUser from github.PullRequest import PullRequest as PullRequest from github.Repository import Repository as Repository +from env_helper import GITHUB_REPOSITORY +from get_robot_token import get_best_robot_token + # pylint: enable=useless-import-alias CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache") @@ -260,3 +264,18 @@ class GitHub(github.Github): def retries(self, value: int) -> None: assert isinstance(value, int) self._retries = value + + # minimalistic static methods not using pygithub + @staticmethod + def cancel_wf(run_id, strict=False): + token = get_best_robot_token() + headers = {"Authorization": f"token {token}"} + url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/actions/runs/{run_id}/cancel" + try: + response = requests.post(url, headers=headers, timeout=10) + response.raise_for_status() + print(f"NOTE: Workflow [{run_id}] has been cancelled") + except Exception as ex: + print("ERROR: Got exception executing wf cancel request", ex) + if strict: + raise ex From f815b4e037bb1ecd938ad659660f4d05326d0b7d Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 17:15:55 +0200 Subject: [PATCH 478/651] Fix style --- src/Databases/DatabaseLazy.cpp | 14 ++++++++++---- src/Databases/DatabasesCommon.cpp | 14 ++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index a27e69c7e63..c95d690f331 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -174,12 +174,18 @@ bool DatabaseLazy::empty() const return tables_cache.empty(); } -static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) { - if (storage->isView()) { +static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) +{ + if (storage->isView()) + { return CurrentMetrics::AttachedView; - } else if (storage->isDictionary()) { + } + else if (storage->isDictionary()) + { return CurrentMetrics::AttachedDictionary; - } else { + } + else + { return CurrentMetrics::AttachedTable; } } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 03a8feb845f..ff721e8e5c4 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -255,12 +255,18 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con } -static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) { - if (storage->isView()) { +static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) +{ + if (storage->isView()) + { return CurrentMetrics::AttachedView; - } else if (storage->isDictionary()) { + } + else if (storage->isDictionary()) + { return CurrentMetrics::AttachedDictionary; - } else { + } + else + { return CurrentMetrics::AttachedTable; } } From 0106f558fb9040c97fcb7691dc5d72a144ad637b Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 17:19:52 +0200 Subject: [PATCH 479/651] Update limits --- .../en/operations/server-configuration-parameters/settings.md | 4 ++-- src/Core/ServerSettings.h | 4 ++-- src/Interpreters/Context.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 4d239309886..a5fe74fd0c6 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -563,7 +563,7 @@ Default value: 5000 ## max\_view\_num\_to\_warn {#max-view-num-to-warn} If the number of attached views exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. -Default value: 5000 +Default value: 10000 **Example** @@ -573,7 +573,7 @@ Default value: 5000 ## max\_dictionary\_num\_to\_warn {#max-dictionary-num-to-warn} If the number of attached dictionaries exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table. -Default value: 5000 +Default value: 1000 **Example** diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index af96ca3a557..ea0b155b22d 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -97,8 +97,8 @@ namespace DB M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_view_num_to_warn, 5000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_dictionary_num_to_warn, 5000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_view_num_to_warn, 10000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_dictionary_num_to_warn, 1000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 4c5df8ef4ea..e1d82a8f604 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -361,8 +361,8 @@ struct ContextSharedPart : boost::noncopyable /// No lock required for format_schema_path modified only during initialization std::atomic_size_t max_database_num_to_warn = 1000lu; std::atomic_size_t max_table_num_to_warn = 5000lu; - std::atomic_size_t max_view_num_to_warn = 5000lu; - std::atomic_size_t max_dictionary_num_to_warn = 5000lu; + std::atomic_size_t max_view_num_to_warn = 10000lu; + std::atomic_size_t max_dictionary_num_to_warn = 1000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. From 828885c66c8a06d24c34b0d92c6cddda3525b30f Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 17:20:52 +0200 Subject: [PATCH 480/651] Fix applyNewSettings --- .../AzureBlobStorage/AzureObjectStorage.cpp | 4 +++- .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 3 ++- src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp | 5 +++-- src/Disks/ObjectStorages/Cached/CachedObjectStorage.h | 3 ++- src/Disks/ObjectStorages/DiskObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 9 +++++++-- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 5 ----- src/Disks/ObjectStorages/Local/LocalObjectStorage.h | 5 ----- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 5 +++-- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 3 ++- src/Disks/ObjectStorages/Web/WebObjectStorage.cpp | 5 ----- src/Disks/ObjectStorages/Web/WebObjectStorage.h | 5 ----- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 5 ++--- 13 files changed, 25 insertions(+), 34 deletions(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index c09cb5e24e1..e7ecf7cd515 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -398,7 +398,9 @@ void AzureObjectStorage::copyObject( /// NOLINT dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options); } -void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +void AzureObjectStorage::applyNewSettings( + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ContextPtr context, const ApplyNewSettingsOptions &) { auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); settings.set(std::move(new_settings)); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index c38b5906f4e..e09f5e6753d 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -143,7 +143,8 @@ public: void applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) override; + ContextPtr context, + const ApplyNewSettingsOptions & options) override; String getObjectsNamespace() const override { return object_namespace ; } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index c834ef56644..f2f33684fde 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -192,9 +192,10 @@ void CachedObjectStorage::shutdown() } void CachedObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ContextPtr context, const ApplyNewSettingsOptions & options) { - object_storage->applyNewSettings(config, config_prefix, context); + object_storage->applyNewSettings(config, config_prefix, context, options); } String CachedObjectStorage::getObjectsNamespace() const diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index ed78eb90ef4..a4d263e92eb 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -91,7 +91,8 @@ public: void applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) override; + ContextPtr context, + const ApplyNewSettingsOptions & options) override; String getObjectsNamespace() const override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index f6980d1e8f1..27e0cc78a38 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -536,7 +536,7 @@ void DiskObjectStorage::applyNewSettings( { /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name const auto config_prefix = "storage_configuration.disks." + name; - object_storage->applyNewSettings(config, config_prefix, context_); + object_storage->applyNewSettings(config, config_prefix, context_, IObjectStorage::ApplyNewSettingsOptions{ .allow_client_change = true }); { std::unique_lock lock(resource_mutex); diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 5724ae8929c..d4ac6ea0239 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -199,10 +199,15 @@ public: virtual void startup() = 0; /// Apply new settings, in most cases reiniatilize client and some other staff + struct ApplyNewSettingsOptions + { + bool allow_client_change = true; + }; virtual void applyNewSettings( - const Poco::Util::AbstractConfiguration &, + const Poco::Util::AbstractConfiguration & /* config */, const std::string & /*config_prefix*/, - ContextPtr) {} + ContextPtr /* context */, + const ApplyNewSettingsOptions & /* options */) {} /// Sometimes object storages have something similar to chroot or namespace, for example /// buckets in S3. If object storage doesn't have any namepaces return empty string. diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index fa27e08f404..a247d86ddce 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -222,11 +222,6 @@ std::unique_ptr LocalObjectStorage::cloneObjectStorage( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage"); } -void LocalObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */) -{ -} - ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { constexpr size_t key_name_total_size = 32; diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index 4c667818c88..371cd37f8b2 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -73,11 +73,6 @@ public: void startup() override; - void applyNewSettings( - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - ContextPtr context) override; - String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage( diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 7891be64b06..d18468411ea 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -572,7 +572,8 @@ void S3ObjectStorage::startup() void S3ObjectStorage::applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) + ContextPtr context, + const ApplyNewSettingsOptions & options) { auto new_s3_settings = getSettings(config, config_prefix, context); if (!static_headers.empty()) @@ -586,7 +587,7 @@ void S3ObjectStorage::applyNewSettings( new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); auto current_s3_settings = s3_settings.get(); - if (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3) + if (options.allow_client_change && (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3)) { auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri); client.set(std::move(new_client)); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 74bc5bef3c7..1fff6d67e23 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -149,7 +149,8 @@ public: void applyNewSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context) override; + ContextPtr context, + const ApplyNewSettingsOptions & options) override; std::string getObjectsNamespace() const override { return uri.bucket; } diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 69f6137cd2d..e837e056acc 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -344,11 +344,6 @@ void WebObjectStorage::startup() { } -void WebObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */) -{ -} - ObjectMetadata WebObjectStorage::getObjectMetadata(const std::string & /* path */) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for {}", getName()); diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h index b8ab510a6fb..9d3b9a3a8f0 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h @@ -72,11 +72,6 @@ public: void startup() override; - void applyNewSettings( - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - ContextPtr context) override; - String getObjectsNamespace() const override { return ""; } std::unique_ptr cloneObjectStorage( diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index c45752c10f5..ba91f3038b6 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -87,9 +87,8 @@ bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) c void StorageObjectStorage::updateConfiguration(ContextPtr context) { - /// FIXME: we should be able to update everything apart from client if static_configuration == true. - if (!configuration->isStaticConfiguration()) - object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context); + IObjectStorage::ApplyNewSettingsOptions options{ .allow_client_change = !configuration->isStaticConfiguration() }; + object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context, options); } namespace From d2cdb37df8bb5c23deb3067b576308c39fab7c97 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 17:59:11 +0200 Subject: [PATCH 481/651] Fix test --- .../integration/test_backup_restore_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 07ef305a0be..92389dd1f24 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -330,7 +330,7 @@ def test_backup_restore_correct_block_ids(cluster): SET azure_min_upload_part_size = {min_upload_size}; SET azure_max_upload_part_size = {max_upload_size}; SET azure_max_blocks_in_multipart_upload = {max_blocks}; - BACKUP TABLE test_simple_merge_tree TO {backup_destination}; + BACKUP TABLE test_simple_merge_tree TO {backup_destination} SETTINGS allow_azure_native_copy = 0; """, ) From a38bb095d800686c27cdf45275af7dc7a5dde149 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 18:12:22 +0200 Subject: [PATCH 482/651] Disallow write and truncate if archive --- .../ObjectStorage/StorageObjectStorage.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index ba91f3038b6..b38636e9144 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -242,6 +242,13 @@ SinkToStoragePtr StorageObjectStorage::write( const auto sample_block = metadata_snapshot->getSampleBlock(); const auto & settings = configuration->getQuerySettings(local_context); + if (configuration->isArchive()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Path '{}' contains archive. Write into archive is not supported", + configuration->getPath()); + } + if (configuration->withGlobsIgnorePartitionWildcard()) { throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, @@ -289,6 +296,13 @@ void StorageObjectStorage::truncate( ContextPtr /* context */, TableExclusiveLockHolder & /* table_holder */) { + if (configuration->isArchive()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Path '{}' contains archive. Table cannot be truncated", + configuration->getPath()); + } + if (configuration->withGlobs()) { throw Exception( From 2bf5f0e0fdb6e4ccffad95964622b5da9107ba5b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 21 May 2024 16:13:29 +0000 Subject: [PATCH 483/651] Fix style. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 7ecb91e7972..52cd6207dde 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -614,8 +614,6 @@ struct ScopeAliases case IdentifierLookupContext::FUNCTION: return alias_name_to_lambda_node; case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node; } - - UNREACHABLE(); } enum class FindOption @@ -631,8 +629,6 @@ struct ScopeAliases case FindOption::FIRST_NAME: return identifier.front(); case FindOption::FULL_NAME: return identifier.getFullName(); } - - UNREACHABLE(); } QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) From 3c4fb4f3b632ed4480e730536cb3fe976ca831d0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 16:22:13 +0000 Subject: [PATCH 484/651] Incorporate review feedback --- src/Interpreters/Cache/QueryCache.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 2fddbc0b044..e30da7f233d 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -182,15 +182,14 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database) ast = removeQueryCacheSettings(ast); /// Hash the AST, it must consider aliases (issue #56258) - constexpr bool ignore_aliases = false; - IAST::Hash ast_hash = ast->getTreeHash(ignore_aliases); + SipHash hash; + ast->updateTreeHash(hash, /*ignore_aliases=*/ false); - /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different tables (issue #64136) - IAST::Hash cur_database_hash = CityHash_v1_0_2::CityHash128(current_database.data(), current_database.size()); - UInt64 low_combined = ast_hash.low64 ^ cur_database_hash.low64; - UInt64 high_combined = ast_hash.high64 ^ cur_database_hash.high64; + /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different + /// tables (issue #64136) + hash.update(current_database); - return {low_combined, high_combined}; + return getSipHash128AsPair(hash); } String queryStringFromAST(ASTPtr ast) From 532fe901293968b8dc4fa49299ff09079a9b3cd2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 18:32:19 +0200 Subject: [PATCH 485/651] Remove redundant includes --- src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index b38eb722df5..1c244b1ca36 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -1,10 +1,7 @@ #pragma once - -// #include #include #include #include -// #include namespace DB { From 96715f611bd54127f43f29123b9a06757d3d7daa Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 18:43:53 +0200 Subject: [PATCH 486/651] Apply change from PR #63642 (https://github.com/ClickHouse/ClickHouse/pull/63642) --- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index b38636e9144..dba4aedf7b7 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -131,7 +131,7 @@ public: void applyFilters(ActionDAGNodes added_filter_nodes) override { - filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); From 2a29046d03fb89aca64432c105b84211260d71f3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 May 2024 19:18:39 +0200 Subject: [PATCH 487/651] Revert "Exclude FunctionsConversion from the large objects check for now" This reverts commit 7261f924bb671ceb9d2131175d558df7296ff217. --- utils/check-style/check-large-objects.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index e2266e89556..2122cca911e 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -7,8 +7,6 @@ export LC_ALL=C # The "total" should be printed without localization TU_EXCLUDES=( AggregateFunctionUniq Aggregator - # FIXME: Exclude for now - FunctionsConversion ) if find $1 -name '*.o' | xargs wc -c | grep --regexp='\.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ From c1920130bb308e2d329117113ddf6ada3da2b908 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 May 2024 19:28:49 +0200 Subject: [PATCH 488/651] Apply changes from PR #62120 --- .../ObjectStorageIteratorAsync.cpp | 1 - .../ObjectStorage/StorageObjectStorage.cpp | 18 +++++++++-- .../StorageObjectStorageSource.cpp | 31 ++++++++++++++++--- .../StorageObjectStorageSource.h | 7 ++++- src/Storages/S3Queue/StorageS3Queue.cpp | 1 + 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index 3fb615b2a5c..0420de0f8dd 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -93,7 +93,6 @@ std::future IObjectStorageIterator }, Priority{}); } - bool IObjectStorageIteratorAsync::isValid() { if (!is_initialized) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index dba4aedf7b7..5de7f41b4f7 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -141,14 +141,28 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override { createIterator(nullptr); + Pipes pipes; auto context = getContext(); + const size_t max_threads = context->getSettingsRef().max_threads; + size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount(); + + if (estimated_keys_count > 1) + num_streams = std::min(num_streams, estimated_keys_count); + else + { + /// The amount of keys (zero) was probably underestimated. + /// We will keep one stream for this particular case. + num_streams = 1; + } + + const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( getName(), object_storage, configuration, info, format_settings, - context, max_block_size, iterator_wrapper, need_only_count); + context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count); source->setKeyCondition(filter_actions_dag, context); pipes.emplace_back(std::move(source)); @@ -175,7 +189,7 @@ private: const String name; const bool need_only_count; const size_t max_block_size; - const size_t num_streams; + size_t num_streams; const bool distributed_processing; void createIterator(const ActionsDAG::Node * predicate) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index d3b67876224..8d946f515a3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -48,6 +48,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, + size_t max_parsing_threads_, bool need_only_count_) : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) @@ -57,6 +58,7 @@ StorageObjectStorageSource::StorageObjectStorageSource( , format_settings(format_settings_) , max_block_size(max_block_size_) , need_only_count(need_only_count_) + , max_parsing_threads(max_parsing_threads_) , read_from_format_info(info) , create_reader_pool(std::make_shared( CurrentMetrics::StorageObjectStorageThreads, @@ -277,8 +279,6 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { CompressionMethod compression_method; - const auto max_parsing_threads = need_only_count ? std::optional(1) : std::nullopt; - if (auto object_info_in_archive = dynamic_cast(object_info.get())) { compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); @@ -292,9 +292,17 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade } auto input_format = FormatFactory::instance().getInput( - configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), max_block_size, format_settings, max_parsing_threads, - std::nullopt, /* is_remote_fs */ true, compression_method); + configuration->format, + *read_buf, + read_from_format_info.format_header, + getContext(), + max_block_size, + format_settings, + need_only_count ? 1 : max_parsing_threads, + std::nullopt, + true/* is_remote_fs */, + compression_method, + need_only_count); if (key_condition) input_format->setKeyCondition(key_condition); @@ -440,6 +448,19 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } } +size_t StorageObjectStorageSource::GlobIterator::estimatedKeysCount() +{ + if (object_infos.empty() && !is_finished && object_storage_iterator->isValid()) + { + /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily); + /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do + /// as it would lead to serious slow down of the execution, since objects are going + /// to be fetched sequentially rather than in-parallel with up to times. + return std::numeric_limits::max(); + } + return object_infos.size(); +} + StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor) { std::lock_guard lock(next_mutex); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fb0ad3e32f1..8dbb31fdfba 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -37,6 +37,7 @@ public: ContextPtr context_, UInt64 max_block_size_, std::shared_ptr file_iterator_, + size_t max_parsing_threads_, bool need_only_count_); ~StorageObjectStorageSource() override; @@ -64,6 +65,7 @@ protected: const std::optional format_settings; const UInt64 max_block_size; const bool need_only_count; + const size_t max_parsing_threads; const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; @@ -165,12 +167,13 @@ public: ~GlobIterator() override = default; - size_t estimatedKeysCount() override { return object_infos.size(); } + size_t estimatedKeysCount() override; private: ObjectInfoPtr nextImpl(size_t processor) override; ObjectInfoPtr nextImplUnlocked(size_t processor); void createFilterAST(const String & any_key); + void fillBufferForKey(const std::string & uri_key); const ObjectStoragePtr object_storage; const ConfigurationPtr configuration; @@ -184,6 +187,8 @@ private: ActionsDAGPtr filter_dag; ObjectStorageIteratorPtr object_storage_iterator; bool recursive{false}; + std::vector expanded_keys; + std::vector::iterator expanded_keys_iter; std::unique_ptr matcher; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 867f22ef5fe..f8eb288921c 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -359,6 +359,7 @@ std::shared_ptr StorageS3Queue::createSource( local_context, max_block_size, file_iterator, + local_context->getSettingsRef().max_download_threads, false); auto file_deleter = [=, this](const std::string & path) mutable From dc749325df1fa7f4d686beddd7551c30b881a0fc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 17:31:13 +0000 Subject: [PATCH 489/651] Faaaaaaaaaster --- src/Interpreters/Cache/QueryCache.cpp | 4 ++-- src/Interpreters/Cache/QueryCache.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index e30da7f233d..4b10bfd3dcd 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -203,7 +203,7 @@ String queryStringFromAST(ASTPtr ast) QueryCache::Key::Key( ASTPtr ast_, - String current_database, + const String & current_database, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, @@ -220,7 +220,7 @@ QueryCache::Key::Key( { } -QueryCache::Key::Key(ASTPtr ast_, String current_database, std::optional user_id_, const std::vector & current_user_roles_) +QueryCache::Key::Key(ASTPtr ast_, const String & current_database, std::optional user_id_, const std::vector & current_user_roles_) : QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles { } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c234ea3d464..b5b6f477137 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -88,7 +88,7 @@ public: /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, - String current_database, + const String & current_database, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, @@ -96,7 +96,7 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, String current_database, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, const String & current_database, std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; From 3ff53b8a0f5b62c7d64aaff263211ec060cd3ba7 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 19:38:30 +0200 Subject: [PATCH 490/651] Change double quotes in import --- src/Databases/DatabaseLazy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index c95d690f331..b5535ff2a74 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -10,7 +10,7 @@ #include #include #include -#include "Common/CurrentMetrics.h" +#include #include #include From 24805423544afd3e5c47a736f0da3e47dedac293 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Tue, 21 May 2024 19:42:03 +0200 Subject: [PATCH 491/651] Order imports --- src/Databases/DatabaseLazy.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index b5535ff2a74..7b47a1a2423 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -1,3 +1,10 @@ +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -10,14 +17,7 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include namespace fs = std::filesystem; From 9f71988f01aa70acccac5e1c178f1cbcb8dc74ae Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 21 May 2024 17:44:40 +0000 Subject: [PATCH 492/651] Fix tests --- src/Columns/ColumnDynamic.h | 6 +++--- .../0_stateless/03039_dynamic_all_merge_algorithms_1.sh | 2 +- .../0_stateless/03039_dynamic_all_merge_algorithms_2.sh | 2 +- .../0_stateless/03151_dynamic_type_scale_max_types.sql | 3 +++ 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 40e8e350733..8aece765308 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -96,13 +96,13 @@ public: MutableColumnPtr cloneEmpty() const override { - /// Keep current dynamic structure. - return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics); + /// Keep current dynamic structure but not statistics. + return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types); } MutableColumnPtr cloneResized(size_t size) const override { - return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics); + return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types); } size_t size() const override diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh index 0941f2da369..9cfd2294c8d 100755 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" function test() diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh index f067a99ca19..02362012960 100755 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh +++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" function test() diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql index 04322fc4f0c..632f3504fdb 100644 --- a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql +++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql @@ -1,4 +1,7 @@ SET allow_experimental_dynamic_type=1; +set min_compress_block_size = 585572, max_compress_block_size = 373374, max_block_size = 60768, max_joined_block_size_rows = 18966, max_insert_threads = 5, max_threads = 50, max_read_buffer_size = 708232, connect_timeout_with_failover_ms = 2000, connect_timeout_with_failover_secure_ms = 3000, idle_connection_timeout = 36000, use_uncompressed_cache = true, stream_like_engine_allow_direct_select = true, replication_wait_for_inactive_replica_timeout = 30, compile_aggregate_expressions = false, min_count_to_compile_aggregate_expression = 0, compile_sort_description = false, group_by_two_level_threshold = 1000000, group_by_two_level_threshold_bytes = 12610083, enable_memory_bound_merging_of_aggregation_results = false, min_chunk_bytes_for_parallel_parsing = 18769830, merge_tree_coarse_index_granularity = 12, min_bytes_to_use_direct_io = 10737418240, min_bytes_to_use_mmap_io = 10737418240, log_queries = true, insert_quorum_timeout = 60000, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.05000000074505806, http_response_buffer_size = 294986, fsync_metadata = true, http_send_timeout = 60., http_receive_timeout = 60., opentelemetry_start_trace_probability = 0.10000000149011612, max_bytes_before_external_group_by = 1, max_bytes_before_external_sort = 10737418240, max_bytes_before_remerge_sort = 1326536545, max_untracked_memory = 1048576, memory_profiler_step = 1048576, log_comment = '03151_dynamic_type_scale_max_types.sql', send_logs_level = 'fatal', prefer_localhost_replica = false, optimize_read_in_order = false, optimize_aggregation_in_order = true, aggregation_in_order_max_block_bytes = 27069500, read_in_order_two_level_merge_threshold = 75, allow_introspection_functions = true, database_atomic_wait_for_drop_and_detach_synchronously = true, remote_filesystem_read_method = 'read', local_filesystem_read_prefetch = true, remote_filesystem_read_prefetch = false, merge_tree_compact_parts_min_granules_to_multibuffer_read = 119, async_insert_busy_timeout_max_ms = 5000, read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true, filesystem_cache_segments_batch_size = 10, use_page_cache_for_disks_without_file_cache = true, page_cache_inject_eviction = true, allow_prefetched_read_pool_for_remote_filesystem = false, filesystem_prefetch_step_marks = 50, filesystem_prefetch_min_bytes_for_single_read_task = 16777216, filesystem_prefetch_max_memory_usage = 134217728, filesystem_prefetches_limit = 10, optimize_sorting_by_input_stream_properties = false, allow_experimental_dynamic_type = true, session_timezone = 'Africa/Khartoum', prefer_warmed_unmerged_parts_seconds = 2; + +drop table if exists to_table; CREATE TABLE to_table ( From 51afec49107864e97eb36f9e5760efd1e11bfea8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 21 May 2024 17:59:26 +0000 Subject: [PATCH 493/651] Fixing test. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 52cd6207dde..cfea45732db 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -637,6 +637,10 @@ struct ScopeAliases const std::string * key = &getKey(lookup.identifier, find_option); auto it = alias_map.find(*key); + + if (it == alias_map.end() && lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION) + return {}; + while (it == alias_map.end()) { auto jt = transitive_aliases.find(*key); @@ -4191,7 +4195,7 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook * In the example, identifier `id` should be resolved into one from USING (id) column. */ - auto alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME); + auto * alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME); //auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName()); if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN) { From bedaaa38ef592efdd7da56d2c2a9a98b0424ffee Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 21 May 2024 21:03:13 +0200 Subject: [PATCH 494/651] Update test.py --- .../test_backup_restore_azure_blob_storage/test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index 92389dd1f24..78b186e3227 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -281,7 +281,10 @@ def test_backup_restore_on_merge_tree(cluster): node = cluster.instances["node"] azure_query( node, - f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'", + f""" + DROP TABLE IF EXISTS test_simple_merge_tree; + CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy' + """, ) azure_query(node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')") @@ -306,6 +309,7 @@ def test_backup_restore_correct_block_ids(cluster): azure_query( node, f""" + DROP TABLE IF EXISTS test_simple_merge_tree; CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() From c9d29213d8e6af3569fef6be235f0074888a0261 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 21 May 2024 21:04:28 +0200 Subject: [PATCH 495/651] Update InterpreterCreateQuery.cpp --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 4fdd804452d..541717f1c04 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1493,7 +1493,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, validateVirtualColumns(*res); - if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns())) + if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()) && mode <= LoadingStrictnessLevel::CREATE) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column of type Object, " From 42efc4e2f641b1abec484a36aa32b2cc97e6b49d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 21 May 2024 21:31:52 +0200 Subject: [PATCH 496/651] Pass column position to compact part writer --- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + .../MergeTree/IMergeTreeDataPartWriter.cpp | 4 +++- .../MergeTree/IMergeTreeDataPartWriter.h | 2 ++ .../MergeTree/MergeTreeDataPartCompact.cpp | 21 +++++++++---------- .../MergeTree/MergedBlockOutputStream.cpp | 8 +++---- .../MergedColumnOnlyOutputStream.cpp | 1 + 6 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index f4889d64179..15c8760141a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -186,6 +186,7 @@ public: /// take place, you must take original name of column for this part from /// storage and pass it to this method. std::optional getColumnPosition(const String & column_name) const; + const NameToNumber & getColumnPositions() const { return column_name_to_position; } /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). /// If no checksums are present returns the name of the first physically existing column. diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 27da53de9b0..e8792be6293 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -115,6 +115,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, @@ -151,6 +152,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, @@ -162,7 +164,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( { if (part_type == MergeTreeDataPartType::Compact) return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, + index_granularity_info_, storage_settings_, columns_list, column_positions, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); else if (part_type == MergeTreeDataPartType::Wide) return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 5dcc7ddc599..8eb546c4f2c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -69,6 +69,7 @@ protected: }; using MergeTreeDataPartWriterPtr = std::unique_ptr; +using ColumnPositions = std::unordered_map; MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( MergeTreeDataPartType part_type, @@ -79,6 +80,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr virtual_columns_, const std::vector & indices_to_recalc, diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 332b7d04f7f..98eda5573ce 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -55,6 +55,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const MergeTreeIndexGranularityInfo & index_granularity_info_, const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, + const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr virtual_columns, const std::vector & indices_to_recalc, @@ -64,19 +65,17 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) { -////// TODO: fix the order of columns -//// -//// NamesAndTypesList ordered_columns_list; -//// std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), -//// [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; }); -//// -//// /// Order of writing is important in compact format -//// ordered_columns_list.sort([this](const auto & lhs, const auto & rhs) -//// { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); -//// + NamesAndTypesList ordered_columns_list; + std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list), + [&column_positions](const auto & column) { return column_positions.contains(column.name); }); + + /// Order of writing is important in compact format + ordered_columns_list.sort([&column_positions](const auto & lhs, const auto & rhs) + { return column_positions.at(lhs.name) < column_positions.at(rhs.name); }); + return std::make_unique( data_part_name_, logger_name_, serializations_, data_part_storage_, - index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, + index_granularity_info_, storage_settings_, ordered_columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_, marks_file_extension_, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 5ef967d930a..ee5c197336d 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -44,8 +44,6 @@ MergedBlockOutputStream::MergedBlockOutputStream( if (data_part->isStoredOnDisk()) data_part_storage->createDirectories(); -// /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. -// TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; /// NOTE do not pass context for writing to system.transactions_info_log, /// because part may have temporary name (with temporary block numbers). Will write it later. data_part->version.setCreationTID(tid, nullptr); @@ -55,7 +53,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), data_part_storage, data_part->index_granularity_info, storage_settings, - columns_list, metadata_snapshot, data_part->storage.getVirtualsPtr(), + columns_list, data_part->getColumnPositions(), metadata_snapshot, data_part->storage.getVirtualsPtr(), skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity); } @@ -243,9 +241,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - if (auto file = new_part->partition.store(//storage, + if (auto file = new_part->partition.store( new_part->storage.getInMemoryMetadataPtr(), new_part->storage.getContext(), - new_part->getDataPartStorage(), checksums)) + new_part->getDataPartStorage(), checksums)) written_files.emplace_back(std::move(file)); if (new_part->minmax_idx->initialized) diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 1d1783b1b43..674a9bd498f 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -38,6 +38,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( data_part_storage, data_part->index_granularity_info, storage_settings, header.getNamesAndTypesList(), + data_part->getColumnPositions(), metadata_snapshot_, data_part->storage.getVirtualsPtr(), indices_to_recalc, From c10bafc094eb4b2913e191d2781d574ae05966b6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 20:39:35 +0000 Subject: [PATCH 497/651] Incorporate feedback --- .../functions/other-functions.md | 110 +++++++++++++----- 1 file changed, 84 insertions(+), 26 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 7c930d23919..95591a7f483 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1000,7 +1000,8 @@ If executed in the context of a distributed table, this function generates a nor ## blockNumber -Returns the sequence number of the [data block](https://clickhouse.com/docs/en/integrations/python#data-blocks) where the row is located. +Returns a monotonically increasing sequence number of the [block](../../development/architecture.md#block) containing the row. +The returned block number is updated on a best-effort basis, i.e. it may not be fully accurate **Syntax** @@ -1017,26 +1018,43 @@ blockNumber() Query: ```sql -SELECT groupUniqArray(blockNumber()) -FROM -( +SELECT blockNumber() +FROM ( SELECT * - FROM system.numbers_mt - LIMIT 100000 -) + FROM system.numbers + LIMIT 10 +) SETTINGS max_block_size = 2 ``` Result: ```response -┌─groupUniqArray(blockNumber())─┐ -│ [6,7] │ -└───────────────────────────────┘ +┌─blockNumber()─┐ +│ 7 │ +│ 7 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 8 │ +│ 8 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 9 │ +│ 9 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 10 │ +│ 10 │ +└───────────────┘ +┌─blockNumber()─┐ +│ 11 │ +│ 11 │ +└───────────────┘ ``` ## rowNumberInBlock {#rowNumberInBlock} -Returns the ordinal number of the row in the [data block](https://clickhouse.com/docs/en/integrations/python#data-blocks). Different data blocks are always recalculated. +Returns for each [block](../../development/architecture.md#block) processed by `rowNumberInBlock` the number of the current row. +The returned number starts for each block at 0. **Syntax** @@ -1053,23 +1071,42 @@ rowNumberInBlock() Query: ```sql -SELECT - min(rowNumberInBlock()), - max(rowNumberInBlock()) -FROM (SELECT * FROM system.numbers_mt LIMIT 100000); +SELECT rowNumberInBlock() +FROM ( + SELECT * + FROM system.numbers_mt + LIMIT 10 +) SETTINGS max_block_size = 2 ``` Result: ```response -┌─min(rowNumberInBlock())─┬─max(rowNumberInBlock())─┐ -│ 0 │ 65408 │ -└─────────────────────────┴─────────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ +┌─rowNumberInBlock()─┐ +│ 0 │ +│ 1 │ +└────────────────────┘ ``` ## rowNumberInAllBlocks -Returns the ordinal number of the row in the [data block](https://clickhouse.com/docs/en/integrations/python#data-blocks). This function only considers the affected data blocks. +Returns a unique row number for each row processed by `rowNumberInAllBlocks`. The returned numbers start at 0. **Syntax** @@ -1086,18 +1123,39 @@ rowNumberInAllBlocks() Query: ```sql -SELECT - min(rowNumberInAllBlocks()), - max(rowNumberInAllBlocks()) -FROM (SELECT * FROM system.numbers_mt LIMIT 100000); +SELECT rowNumberInAllBlocks() +FROM +( + SELECT * + FROM system.numbers_mt + LIMIT 10 +) +SETTINGS max_block_size = 2 ``` Result: ```response -┌─min(rowNumberInAllBlocks())─┬─max(rowNumberInAllBlocks())─┐ -│ 0 │ 99999 │ -└─────────────────────────────┴─────────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 0 │ +│ 1 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 4 │ +│ 5 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 2 │ +│ 3 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 6 │ +│ 7 │ +└────────────────────────┘ +┌─rowNumberInAllBlocks()─┐ +│ 8 │ +│ 9 │ +└────────────────────────┘ ``` From cd742dd040747f70ea92aedfded53565beea1253 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 20:41:20 +0000 Subject: [PATCH 498/651] Fix whitespace errors --- docs/en/sql-reference/functions/other-functions.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 95591a7f483..98cb5631553 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1001,7 +1001,7 @@ If executed in the context of a distributed table, this function generates a nor ## blockNumber Returns a monotonically increasing sequence number of the [block](../../development/architecture.md#block) containing the row. -The returned block number is updated on a best-effort basis, i.e. it may not be fully accurate +The returned block number is updated on a best-effort basis, i.e. it may not be fully accurate. **Syntax** @@ -1019,7 +1019,8 @@ Query: ```sql SELECT blockNumber() -FROM ( +FROM +( SELECT * FROM system.numbers LIMIT 10 @@ -1072,7 +1073,8 @@ Query: ```sql SELECT rowNumberInBlock() -FROM ( +FROM +( SELECT * FROM system.numbers_mt LIMIT 10 From bb0b135c3642d2972fddc9c4e4a584dd5e246f9f Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 06:18:43 +0200 Subject: [PATCH 499/651] Do not decrement counter if table pointer is nut in lazy database detachtable --- src/Databases/DatabaseLazy.cpp | 34 ++++++-------------------- src/Databases/DatabasesCommon.cpp | 40 +++++++------------------------ src/Storages/Utils.cpp | 28 ++++++++++++++++++++++ src/Storages/Utils.h | 7 ++++++ 4 files changed, 51 insertions(+), 58 deletions(-) create mode 100644 src/Storages/Utils.cpp create mode 100644 src/Storages/Utils.h diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 7b47a1a2423..c2fd184f8bc 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -7,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -17,19 +18,12 @@ #include #include #include +#include namespace fs = std::filesystem; -namespace CurrentMetrics -{ - extern const Metric AttachedTable; - extern const Metric AttachedView; - extern const Metric AttachedDictionary; -} - - namespace DB { @@ -174,22 +168,6 @@ bool DatabaseLazy::empty() const return tables_cache.empty(); } -static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) -{ - if (storage->isView()) - { - return CurrentMetrics::AttachedView; - } - else if (storage->isDictionary()) - { - return CurrentMetrics::AttachedDictionary; - } - else - { - return CurrentMetrics::AttachedTable; - } -} - void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &) { LOG_DEBUG(log, "Attach table {}.", backQuote(table_name)); @@ -203,7 +181,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1); + CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -219,7 +197,9 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1); + if (res != nullptr) { + CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); + } } return res; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index ff721e8e5c4..5fee14ecc2a 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -1,4 +1,10 @@ #include + +#include +#include +#include +#include +#include #include #include #include @@ -8,19 +14,8 @@ #include #include #include -#include -#include -#include +#include #include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric AttachedTable; - extern const Metric AttachedView; - extern const Metric AttachedDictionary; -} namespace DB @@ -254,23 +249,6 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con return detachTableUnlocked(table_name); } - -static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) -{ - if (storage->isView()) - { - return CurrentMetrics::AttachedView; - } - else if (storage->isDictionary()) - { - return CurrentMetrics::AttachedDictionary; - } - else - { - return CurrentMetrics::AttachedTable; - } -} - StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name) { StoragePtr res; @@ -282,7 +260,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1); + CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -323,7 +301,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1); + CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp new file mode 100644 index 00000000000..670d6a242e8 --- /dev/null +++ b/src/Storages/Utils.cpp @@ -0,0 +1,28 @@ +#include +#include + +namespace CurrentMetrics +{ + extern const Metric AttachedTable; + extern const Metric AttachedView; + extern const Metric AttachedDictionary; +} + +namespace DB { + + CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage) + { + if (storage->isView()) + { + return CurrentMetrics::AttachedView; + } + else if (storage->isDictionary()) + { + return CurrentMetrics::AttachedDictionary; + } + else + { + return CurrentMetrics::AttachedTable; + } + } +} diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h new file mode 100644 index 00000000000..ffb8479d633 --- /dev/null +++ b/src/Storages/Utils.h @@ -0,0 +1,7 @@ +#include +#include + +namespace DB +{ + CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage); +} From 3f46e4e4305693c9542001fb9e718f2fb098a137 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 22 May 2024 04:35:06 +0000 Subject: [PATCH 500/651] better exception message in delete table with projection --- src/Interpreters/InterpreterDeleteQuery.cpp | 15 ++++++++++++++- src/Storages/IStorage.h | 3 +++ src/Storages/MergeTree/IMergeTreeDataPart.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 15 +++++++++++++++ src/Storages/MergeTree/MergeTreeData.h | 2 ++ .../03161_lightweight_delete_projection.reference | 0 .../03161_lightweight_delete_projection.sql | 15 +++++++++++++++ 7 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03161_lightweight_delete_projection.reference create mode 100644 tests/queries/0_stateless/03161_lightweight_delete_projection.sql diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index ee774994145..9cfb8e486cb 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int TABLE_IS_READ_ONLY; extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } @@ -107,7 +108,19 @@ BlockIO InterpreterDeleteQuery::execute() } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); + /// Currently just better exception for the case of a table with projection, + /// can act differently according to the setting. + if (table->hasProjection()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "DELETE query is not supported for table {}", + table->getStorageID().getFullTableName()); } } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 87a04c3fcc6..37613704c6a 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -259,6 +259,9 @@ public: /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage has any projection. + virtual bool hasProjection() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c380f99060e..f38a80455c4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -442,6 +442,8 @@ public: bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } + bool hasProjection() const { return !projection_parts.empty(); } + bool hasBrokenProjection(const String & projection_name) const; /// Return true, if all projections were loaded successfully and none was marked as broken. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 167160db317..1f7e0a19b3a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6133,6 +6133,21 @@ bool MergeTreeData::supportsLightweightDelete() const return true; } +bool MergeTreeData::hasProjection() const +{ + auto lock = lockParts(); + for (const auto & part : data_parts_by_info) + { + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + + if (part->hasProjection()) + return true; + } + return false; +} + MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2f9283659e3..ff93c7c5ae4 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -438,6 +438,8 @@ public: bool supportsLightweightDelete() const override; + bool hasProjection() const override; + bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql new file mode 100644 index 00000000000..cd29fae8fd7 --- /dev/null +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -0,0 +1,15 @@ + +DROP TABLE IF EXISTS users; + +CREATE TABLE users ( + uid Int16, + name String, + age Int16, + projection p1 (select count(), age group by age) +) ENGINE = MergeTree order by uid; + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, 'Ksenia', 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } From 04de82e96524b88f168b5be18195863e1cf4b18b Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 06:35:25 +0200 Subject: [PATCH 501/651] Fix style --- src/Databases/DatabaseLazy.cpp | 3 ++- src/Storages/Utils.cpp | 2 +- src/Storages/Utils.h | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index c2fd184f8bc..b9c61400eb3 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -197,7 +197,8 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - if (res != nullptr) { + if (res != nullptr) + { CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); } } diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp index 670d6a242e8..df86ef15cff 100644 --- a/src/Storages/Utils.cpp +++ b/src/Storages/Utils.cpp @@ -25,4 +25,4 @@ namespace DB { return CurrentMetrics::AttachedTable; } } -} +} diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h index ffb8479d633..3e92f6247c6 100644 --- a/src/Storages/Utils.h +++ b/src/Storages/Utils.h @@ -1,3 +1,5 @@ +#pragma once + #include #include From a8fe7294d2e39b00f24fce5077b2a3a6ae63bf01 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 07:06:19 +0200 Subject: [PATCH 502/651] Do not distinguish resource types for lazy database --- src/Databases/DatabaseLazy.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index b9c61400eb3..003943fbbe4 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -18,11 +18,15 @@ #include #include #include -#include namespace fs = std::filesystem; +namespace CurrentMetrics +{ + extern const Metric AttachedTable; +} + namespace DB { @@ -181,7 +185,8 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -197,10 +202,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - if (res != nullptr) - { - CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); - } + CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); } return res; } From 49529a1af9e15c1f3b6cda267034b93a48ce7e8a Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 07:18:17 +0200 Subject: [PATCH 503/651] Remove trailing whitespace --- src/Databases/DatabaseLazy.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 003943fbbe4..f0a56a0243d 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -186,7 +186,6 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); - } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) From 7be50ee90d688567a88152a324dc783369acde48 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 07:26:24 +0200 Subject: [PATCH 504/651] Add missing newline~ --- src/Databases/DatabaseLazy.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index f0a56a0243d..e72834eddbe 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -22,6 +22,7 @@ namespace fs = std::filesystem; + namespace CurrentMetrics { extern const Metric AttachedTable; From a0ad4a96c72525b0fb2e9ac9a8b70c88d847b56b Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 07:34:38 +0200 Subject: [PATCH 505/651] Add yet more missing newlines --- src/Storages/Utils.cpp | 2 ++ src/Storages/Utils.h | 1 + 2 files changed, 3 insertions(+) diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp index df86ef15cff..b0c06f5ccf6 100644 --- a/src/Storages/Utils.cpp +++ b/src/Storages/Utils.cpp @@ -1,6 +1,7 @@ #include #include + namespace CurrentMetrics { extern const Metric AttachedTable; @@ -8,6 +9,7 @@ namespace CurrentMetrics extern const Metric AttachedDictionary; } + namespace DB { CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage) diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h index 3e92f6247c6..c86c2a4c341 100644 --- a/src/Storages/Utils.h +++ b/src/Storages/Utils.h @@ -3,6 +3,7 @@ #include #include + namespace DB { CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage); From 8869094c9986906034f3368a2cdeee179a7976b1 Mon Sep 17 00:00:00 2001 From: Francisco Javier Jurado Moreno <9376816+Beetelbrox@users.noreply.github.com> Date: Wed, 22 May 2024 07:42:05 +0200 Subject: [PATCH 506/651] Move opening brackets to its own line --- src/Storages/Utils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp index b0c06f5ccf6..ff73888e19d 100644 --- a/src/Storages/Utils.cpp +++ b/src/Storages/Utils.cpp @@ -10,8 +10,8 @@ namespace CurrentMetrics } -namespace DB { - +namespace DB +{ CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage) { if (storage->isView()) From 89f26b56c659d3088288928353a4fbf612d4066b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 May 2024 19:25:37 +0200 Subject: [PATCH 507/651] Fix stripping heavy debug symbols in functions v2: remove resolving realpath Signed-off-by: Azat Khuzhin --- src/Functions/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 751e8cf5103..c52b00150ec 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -31,7 +31,7 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_obj PRIVATE "-g0") - set_source_files_properties(${DBMS_FUNCTIONS} PROPERTIES COMPILE_FLAGS "-g0") + set_source_files_properties(${DBMS_FUNCTIONS} DIRECTORY .. PROPERTIES COMPILE_FLAGS "-g0") endif() list (APPEND OBJECT_LIBS $) From cad1f1a1112bae7735ecb5c858c13baf53d0fdfd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 22 May 2024 07:50:45 +0200 Subject: [PATCH 508/651] Tune cpu limit for loongarch64 Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d51c1b242f..96ba2961d3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,8 +68,9 @@ if (ENABLE_CHECK_HEAVY_BUILDS) set (RLIMIT_AS 20000000000) endif() - # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file - if (ARCH_RISCV64) + # For some files currently building RISCV64/LOONGARCH64 might be too slow. + # TODO: Improve compilation times per file + if (ARCH_RISCV64 OR ARCH_LOONGARCH64) set (RLIMIT_CPU 1800) endif() From 12ce276b8af09da46cb89ed9e2e15bb9ceff758a Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 22 May 2024 08:51:41 +0200 Subject: [PATCH 509/651] clang-tidy fix --- src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 8 ++++---- src/Storages/MergeTree/IMergeTreeDataPartWriter.h | 4 ++-- src/Storages/MergeTree/MergeTreeDataPartCompact.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartWide.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index e8792be6293..891ba1b9660 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -52,7 +52,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) : data_part_name(data_part_name_) @@ -117,7 +117,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const NamesAndTypesList & columns_list, const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -134,7 +134,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, @@ -154,7 +154,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const NamesAndTypesList & columns_list, const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 8eb546c4f2c..f04beb37ebb 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -30,7 +30,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_ = {}); @@ -82,7 +82,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const NamesAndTypesList & columns_list, const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 98eda5573ce..4a160e5e229 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -57,7 +57,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const NamesAndTypesList & columns_list, const ColumnPositions & column_positions, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index d4630d3dd3f..149f86cef00 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -62,7 +62,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns, + const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension_, From 58e655e07b128c4dfd26ffe60ad9d9ee285b3fa9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 22 May 2024 07:24:42 +0000 Subject: [PATCH 510/651] Incorporate review feedback --- programs/keeper-client/Commands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 3c649cad0d3..860840a2d06 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -10,8 +10,8 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int KEEPER_EXCEPTION; - extern const int UNEXPECTED_ZOOKEEPER_ERROR; } bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -442,7 +442,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient new_members = query->args[1].safeGet(); break; default: - throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected operation: {}", operation); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected operation: {}", operation); } auto response = client->zookeeper->reconfig(joining, leaving, new_members); From 376282dd6dce879008f0f0295402bc197d2b1e39 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 22 May 2024 09:58:31 +0200 Subject: [PATCH 511/651] Revert "Prevent conversion to Replicated if zookeeper path already exists" --- src/Databases/DatabaseOrdinary.cpp | 14 ---- .../configs/config.d/clusters.xml | 2 +- ...sters_zk_path.xml => clusters_unusual.xml} | 2 +- .../test_unusual_path.py | 6 +- .../test_zk_path.py | 69 ------------------- 5 files changed, 5 insertions(+), 88 deletions(-) rename tests/integration/test_modify_engine_on_restart/configs/config.d/{clusters_zk_path.xml => clusters_unusual.xml} (80%) delete mode 100644 tests/integration/test_modify_engine_on_restart/test_zk_path.py diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 58fa7f01947..5d36f1cc3d6 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -76,20 +76,6 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex String replica_path = server_settings.default_replica_path; String replica_name = server_settings.default_replica_name; - /// Check that replica path doesn't exist - Macros::MacroExpansionInfo info; - StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid); - info.table_id = table_id; - info.expand_special_macros_only = false; - - String zookeeper_path = context->getMacros()->expand(replica_path, info); - if (context->getZooKeeper()->exists(zookeeper_path)) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.", - zookeeper_path, backQuote(table_id.getFullTableName()) - ); - auto args = std::make_shared(); args->children.push_back(std::make_shared(replica_path)); args->children.push_back(std::make_shared(replica_name)); diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml index c8bbb7f3530..d3a9d4fb8f0 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml @@ -19,4 +19,4 @@ 01 - + \ No newline at end of file diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml similarity index 80% rename from tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml rename to tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml index ba13cd87031..812291335b8 100644 --- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml +++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml @@ -15,6 +15,6 @@ 01 -/clickhouse/'/{database}/{table}/{uuid} +/lol/kek/'/{uuid} diff --git a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py index 20d2c29257b..e82f48e8b34 100644 --- a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py +++ b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py @@ -6,7 +6,7 @@ cluster = ClickHouseCluster(__file__) ch1 = cluster.add_instance( "ch1", main_configs=[ - "configs/config.d/clusters_zk_path.xml", + "configs/config.d/clusters_unusual.xml", "configs/config.d/distributed_ddl.xml", ], with_zookeeper=True, @@ -63,7 +63,7 @@ def check_tables(): ) .strip() .startswith( - "ReplicatedReplacingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', D)" + "ReplicatedReplacingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', D)" ) ) assert ( @@ -73,7 +73,7 @@ def check_tables(): ) .strip() .startswith( - "ReplicatedVersionedCollapsingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', Sign, Version)" + "ReplicatedVersionedCollapsingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', Sign, Version)" ) ) diff --git a/tests/integration/test_modify_engine_on_restart/test_zk_path.py b/tests/integration/test_modify_engine_on_restart/test_zk_path.py deleted file mode 100644 index dd633ad0810..00000000000 --- a/tests/integration/test_modify_engine_on_restart/test_zk_path.py +++ /dev/null @@ -1,69 +0,0 @@ -import pytest -from test_modify_engine_on_restart.common import ( - get_table_path, - set_convert_flags, -) -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -ch1 = cluster.add_instance( - "ch1", - main_configs=[ - "configs/config.d/clusters_zk_path.xml", - "configs/config.d/distributed_ddl.xml", - ], - with_zookeeper=True, - macros={"replica": "node1"}, - stay_alive=True, -) - -database_name = "modify_engine_zk_path" - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def q(node, query): - return node.query(database=database_name, sql=query) - - -def test_modify_engine_fails_if_zk_path_exists(started_cluster): - ch1.query("CREATE DATABASE " + database_name) - - q( - ch1, - "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;", - ) - uuid = q( - ch1, - f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'", - ).strip("'[]\n") - - q( - ch1, - f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/\\'/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;", - ) - - set_convert_flags(ch1, database_name, ["already_exists_1"]) - - table_data_path = get_table_path(ch1, "already_exists_1", database_name) - - ch1.stop_clickhouse() - ch1.start_clickhouse(retry_start=False, expected_to_fail=True) - - # Check if we can cancel convertation - ch1.exec_in_container( - [ - "bash", - "-c", - f"rm {table_data_path}convert_to_replicated", - ] - ) - ch1.start_clickhouse() From 7f46eae7b4961b3d58e2d592bc42ba5a32297f7c Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 22 May 2024 11:31:01 +0200 Subject: [PATCH 512/651] clang-tidy fix --- src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h | 2 +- src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h | 2 +- src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartWriterWide.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 328e3118ba9..2d86e0f0770 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -18,7 +18,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc, const String & marks_file_extension_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index f62f060fde2..ebf96c1ebb2 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -19,7 +19,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 30f01c1acd6..0a8920790e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -148,7 +148,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeIndices & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index a60fcd43a58..0c31cabc8c4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -109,7 +109,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 001f09b81b3..9df6cc5e2f7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -84,7 +84,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list_, const StorageMetadataPtr & metadata_snapshot_, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, const Statistics & stats_to_recalc_, const String & marks_file_extension_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 8dc488788c6..63205775c58 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -29,7 +29,7 @@ public: const MergeTreeSettingsPtr & storage_settings_, const NamesAndTypesList & columns_list, const StorageMetadataPtr & metadata_snapshot, - const VirtualsDescriptionPtr virtual_columns_, + const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, const Statistics & stats_to_recalc_, const String & marks_file_extension, From 8dd52a26257a9dc11723e5a87507f6815f4fb818 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 May 2024 18:42:14 +0200 Subject: [PATCH 513/651] Ignore allow_suspicious_primary_key on ATTACH and verify on ALTER Signed-off-by: Azat Khuzhin Co-authored-by: Alexander Tokmakov --- src/Storages/MergeTree/MergeTreeData.cpp | 12 +++++++++++ src/Storages/MergeTree/MergeTreeData.h | 2 ++ .../MergeTree/registerStorageMergeTree.cpp | 20 ++++-------------- src/Storages/StorageMergeTree.cpp | 8 +++++-- src/Storages/StorageReplicatedMergeTree.cpp | 12 +++++++++-- ...03020_order_by_SimpleAggregateFunction.sql | 21 ++++++++++++++++--- 6 files changed, 52 insertions(+), 23 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 527dac01b71..13d59d671ea 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,7 @@ namespace ErrorCodes extern const int CANNOT_SCHEDULE_TASK; extern const int LIMIT_EXCEEDED; extern const int CANNOT_FORGET_PARTITION; + extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; } static void checkSuspiciousIndices(const ASTFunction * index_function) @@ -8538,6 +8540,16 @@ void MergeTreeData::unloadPrimaryKeys() } } +void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) +{ + /// Aggregate functions already forbidden, but SimpleAggregateFunction are not + for (const auto & data_type : sorting_key.data_types) + { + if (dynamic_cast(data_type->getCustomName())) + throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName()); + } +} + bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove) { for (const auto & command : commands) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2f9283659e3..062f967bb93 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -736,6 +736,8 @@ public: const ASTPtr & new_settings, AlterLockHolder & table_lock_holder); + static void verifySortingKey(const KeyDescription & sorting_key); + /// Should be called if part data is suspected to be corrupted. /// Has the ability to check all other parts /// which reside on the same disk of the suspicious part. diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 4244ccccfe0..d234103e52b 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -32,7 +31,6 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY; } @@ -113,16 +111,6 @@ static ColumnsDescription getColumnsDescriptionFromZookeeper(const String & raw_ return ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns", &columns_stat)); } -static void verifySortingKey(const KeyDescription & sorting_key) -{ - /// Aggregate functions already forbidden, but SimpleAggregateFunction are not - for (const auto & data_type : sorting_key.data_types) - { - if (dynamic_cast(data_type->getCustomName())) - throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName()); - } -} - /// Returns whether a new syntax is used to define a table engine, i.e. MergeTree() PRIMARY KEY ... PARTITION BY ... SETTINGS ... /// instead of MergeTree(MergeTree(date, [sample_key], primary_key). static bool isExtendedStorageDef(const ASTCreateQuery & query) @@ -678,8 +666,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// column if sorting key will be changed. metadata.sorting_key = KeyDescription::getSortingKeyFromAST( args.storage_def->order_by->ptr(), metadata.columns, context, merging_param_key_arg); - if (!local_settings.allow_suspicious_primary_key) - verifySortingKey(metadata.sorting_key); + if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE) + MergeTreeData::verifySortingKey(metadata.sorting_key); /// If primary key explicitly defined, than get it from AST if (args.storage_def->primary_key) @@ -792,8 +780,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// column if sorting key will be changed. metadata.sorting_key = KeyDescription::getSortingKeyFromAST(engine_args[arg_num], metadata.columns, context, merging_param_key_arg); - if (!local_settings.allow_suspicious_primary_key) - verifySortingKey(metadata.sorting_key); + if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE) + MergeTreeData::verifySortingKey(metadata.sorting_key); /// In old syntax primary_key always equals to sorting key. metadata.primary_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, context); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9144ef7c0f7..ea698775298 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -333,17 +333,21 @@ void StorageMergeTree::alter( auto table_id = getStorageID(); auto old_storage_settings = getSettings(); + const auto & query_settings = local_context->getSettingsRef(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); - auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, local_context->getSettingsRef().materialize_ttl_after_modify, local_context); + auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, query_settings.materialize_ttl_after_modify, local_context); if (!maybe_mutation_commands.empty()) delayMutationOrThrowIfNeeded(nullptr, local_context); Int64 mutation_version = -1; commands.apply(new_metadata, local_context); + if (!query_settings.allow_suspicious_primary_key) + MergeTreeData::verifySortingKey(new_metadata.sorting_key); + /// This alter can be performed at new_metadata level only if (commands.isSettingsAlter()) { @@ -396,7 +400,7 @@ void StorageMergeTree::alter( resetObjectColumnsFromActiveParts(parts_lock); } - if (!maybe_mutation_commands.empty() && local_context->getSettingsRef().alter_sync > 0) + if (!maybe_mutation_commands.empty() && query_settings.alter_sync > 0) waitForMutation(mutation_version, false); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 378b81c6d18..e0a24ceac4d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6027,6 +6027,7 @@ void StorageReplicatedMergeTree::alter( assertNotReadonly(); auto table_id = getStorageID(); + const auto & query_settings = query_context->getSettingsRef(); if (commands.isSettingsAlter()) { @@ -6054,6 +6055,13 @@ void StorageReplicatedMergeTree::alter( return; } + if (!query_settings.allow_suspicious_primary_key) + { + StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); + commands.apply(future_metadata, query_context); + + MergeTreeData::verifySortingKey(future_metadata.sorting_key); + } auto ast_to_str = [](ASTPtr query) -> String { @@ -6186,7 +6194,7 @@ void StorageReplicatedMergeTree::alter( auto maybe_mutation_commands = commands.getMutationCommands( *current_metadata, - query_context->getSettingsRef().materialize_ttl_after_modify, + query_settings.materialize_ttl_after_modify, query_context); bool have_mutation = !maybe_mutation_commands.empty(); @@ -6309,7 +6317,7 @@ void StorageReplicatedMergeTree::alter( { LOG_DEBUG(log, "Metadata changes applied. Will wait for data changes."); merge_selecting_task->schedule(); - waitMutation(*mutation_znode, query_context->getSettingsRef().alter_sync); + waitMutation(*mutation_znode, query_settings.alter_sync); LOG_DEBUG(log, "Data changes applied."); } } diff --git a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql index f1727cb9e5c..fee42d1abc6 100644 --- a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql +++ b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql @@ -1,6 +1,6 @@ set allow_suspicious_primary_key = 0; -DROP TABLE IF EXISTS data; +drop table if exists data; create table data (key Int, value AggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } @@ -12,7 +12,22 @@ create table data (key Int, value AggregateFunction(sum, UInt64)) engine=Aggrega create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } set allow_suspicious_primary_key = 1; - create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -DROP TABLE data; +-- ATTACH should work regardless allow_suspicious_primary_key +set allow_suspicious_primary_key = 0; +detach table data; +attach table data; +drop table data; + +-- ALTER AggregatingMergeTree +create table data (key Int) engine=AggregatingMergeTree() order by (key); +alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } +alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1; +drop table data; + +-- ALTER ReplicatedAggregatingMergeTree +create table data_rep (key Int) engine=ReplicatedAggregatingMergeTree('/tables/{database}', 'r1') order by (key); +alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY } +alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1; +drop table data_rep; From d5d8d689748fbc125c37381fd9680c32468e07d0 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 22 May 2024 13:06:56 +0200 Subject: [PATCH 514/651] Remove unused storage_snapshot field --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeSelectProcessor.cpp | 2 -- src/Storages/MergeTree/MergeTreeSelectProcessor.h | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 6f0fa55c349..503031eb04b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -381,7 +381,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -480,7 +480,7 @@ Pipe ReadFromMergeTree::readFromPool( auto algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); auto source = std::make_shared(std::move(processor)); @@ -592,7 +592,7 @@ Pipe ReadFromMergeTree::readInOrder( algorithm = std::make_unique(i); auto processor = std::make_unique( - pool, std::move(algorithm), storage_snapshot, prewhere_info, + pool, std::move(algorithm), prewhere_info, actions_settings, block_size, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index fce733d47b7..78b67de1a7e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -26,14 +26,12 @@ namespace ErrorCodes MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) - , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 6b663e0fd36..8f41f5deacb 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -41,7 +41,6 @@ public: MergeTreeSelectProcessor( MergeTreeReadPoolPtr pool_, MergeTreeSelectAlgorithmPtr algorithm_, - const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, const MergeTreeReadTask::BlockSizeParams & block_size_params_, @@ -71,7 +70,6 @@ private: const MergeTreeReadPoolPtr pool; const MergeTreeSelectAlgorithmPtr algorithm; - const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; const ExpressionActionsSettings actions_settings; From 5f01b14e0dc2f9a96d1c06cd2f9fb0112209ab59 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 22 May 2024 12:00:29 +0200 Subject: [PATCH 515/651] add prints --- tests/ci/ci.py | 4 ++-- tests/ci/ci_metadata.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 046550c62f8..40f5617f165 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1912,9 +1912,9 @@ def _get_ext_check_name(check_name: str) -> str: def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None: - run_id = CiMetadata(s3, pr_number).run_id + run_id = CiMetadata(s3, pr_number).fetch_meta().run_id if not run_id: - print("ERROR: FIX IT: Run id has not been found!") + print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!") else: print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]") GitHub.cancel_wf(run_id) diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py index 5856e9a8501..82d44cf1adc 100644 --- a/tests/ci/ci_metadata.py +++ b/tests/ci/ci_metadata.py @@ -6,6 +6,7 @@ from env_helper import ( TEMP_PATH, ) from s3_helper import S3Helper +from ci_utils import GHActions # pylint: disable=too-many-lines @@ -83,7 +84,10 @@ class CiMetadata: Uploads meta on s3 """ assert self.run_id - print("Storing workflow meta on s3") + GHActions.print_in_group( + f"Storing workflow metadata: PR [{self.pr_number}]", + [f"run_id: {self.run_id}"], + ) local_file = self._LOCAL_PATH / self._FILENAME_RUN_ID with open(local_file, "w", encoding="utf-8") as file: From 5c47b091144e24ee1fbd6627186e7965c9ad233e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 22 May 2024 13:18:51 +0200 Subject: [PATCH 516/651] Ignore text_log for Keeper --- programs/keeper/Keeper.cpp | 5 +++++ programs/keeper/Keeper.h | 2 ++ src/Loggers/Loggers.cpp | 2 +- src/Loggers/Loggers.h | 4 ++++ src/Loggers/OwnSplitChannel.cpp | 9 +++++---- src/Loggers/OwnSplitChannel.h | 1 - 6 files changed, 17 insertions(+), 6 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 267b725b02b..dba5c2b7d2a 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -182,6 +182,11 @@ std::string Keeper::getDefaultConfigFileName() const return "keeper_config.xml"; } +bool Keeper::allowTextLog() const +{ + return false; +} + void Keeper::handleCustomArguments(const std::string & arg, [[maybe_unused]] const std::string & value) // NOLINT { if (arg == "force-recovery") diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index f889ffa595b..c449c40b610 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -65,6 +65,8 @@ protected: std::string getDefaultConfigFileName() const override; + bool allowTextLog() const override; + private: Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 4b17469f4d7..0bd4b94d999 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -263,7 +263,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } #ifndef WITHOUT_TEXT_LOG - if (config.has("text_log")) + if (allowTextLog() && config.has("text_log")) { String text_log_level_str = config.getString("text_log.level", "trace"); int text_log_level = Poco::Logger::parseLevel(text_log_level_str); diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h index 9eff731a4c5..9923d66ebcb 100644 --- a/src/Loggers/Loggers.h +++ b/src/Loggers/Loggers.h @@ -23,6 +23,10 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); + virtual ~Loggers() = default; + +protected: + virtual bool allowTextLog() const { return true; } private: Poco::AutoPtr log_file; diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index fee33781c27..dc51a13e01f 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -107,6 +107,10 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) [[maybe_unused]] bool push_result = logs_queue->emplace(std::move(columns)); } + auto text_log_locked = text_log.lock(); + if (!text_log_locked) + return; + /// Also log to system.text_log table, if message is not too noisy auto text_log_max_priority_loaded = text_log_max_priority.load(std::memory_order_relaxed); if (text_log_max_priority_loaded && msg.getPriority() <= text_log_max_priority_loaded) @@ -146,10 +150,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) #undef SET_VALUE_IF_EXISTS - std::shared_ptr> text_log_locked{}; - text_log_locked = text_log.lock(); - if (text_log_locked) - text_log_locked->push(std::move(elem)); + text_log_locked->push(std::move(elem)); } #endif } diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index b75554eefc4..7ca27cf6584 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include From 03fc077be7d8576c4e3e550842f2fd7c6d06a78f Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 22 May 2024 14:12:37 +0200 Subject: [PATCH 517/651] Fxi --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/Storages/ObjectStorage/ReadBufferIterator.cpp | 6 +++--- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 1 - src/Storages/ObjectStorage/StorageObjectStorageSource.cpp | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index d18468411ea..c07313b52db 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -575,7 +575,7 @@ void S3ObjectStorage::applyNewSettings( ContextPtr context, const ApplyNewSettingsOptions & options) { - auto new_s3_settings = getSettings(config, config_prefix, context); + auto new_s3_settings = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings); if (!static_headers.empty()) { new_s3_settings->auth_settings.headers.insert( diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index e065de16e55..5a8a4735fe1 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -145,7 +145,7 @@ std::unique_ptr ReadBufferIterator::recreateLastReadBuffer() auto context = getContext(); const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath(); - auto impl = object_storage->readObject(StoredObject(), context->getReadSettings()); + auto impl = object_storage->readObject(StoredObject(path), context->getReadSettings()); const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); const auto zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); @@ -258,10 +258,10 @@ ReadBufferIterator::Data ReadBufferIterator::next() std::unique_ptr read_buf; CompressionMethod compression_method; using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; - if (auto object_info_in_archive = dynamic_cast(current_object_info.get())) + if (const auto * object_info_in_archive = dynamic_cast(current_object_info.get())) { compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); - auto & archive_reader = object_info_in_archive->archive_reader; + const auto & archive_reader = object_info_in_archive->archive_reader; read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); } else diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 5de7f41b4f7..2c8e60b49d0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 8d946f515a3..a2b3ca5b69e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -279,10 +279,10 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { CompressionMethod compression_method; - if (auto object_info_in_archive = dynamic_cast(object_info.get())) + if (const auto * object_info_in_archive = dynamic_cast(object_info.get())) { compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method); - auto & archive_reader = object_info_in_archive->archive_reader; + const auto & archive_reader = object_info_in_archive->archive_reader; read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); } else From 6942ae0c1e6204d8ee91b8e69e88be85ec289620 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 13 May 2024 12:00:52 +0000 Subject: [PATCH 518/651] Fix SimpleSquashingChunksTransform (02115_rewrite_local_join_right_distribute_table) --- src/Processors/IInflatingTransform.cpp | 22 +++++--- src/Processors/IInflatingTransform.h | 8 ++- .../Transforms/ArrayJoinTransform.cpp | 4 +- .../Transforms/ArrayJoinTransform.h | 2 +- .../Transforms/SquashingChunksTransform.cpp | 52 +++++++------------ .../Transforms/SquashingChunksTransform.h | 12 ++--- ...rite_local_join_right_distribute_table.sql | 4 -- 7 files changed, 48 insertions(+), 56 deletions(-) diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp index ffa5b55dc76..bc0b3e8459e 100644 --- a/src/Processors/IInflatingTransform.cpp +++ b/src/Processors/IInflatingTransform.cpp @@ -45,8 +45,13 @@ IInflatingTransform::Status IInflatingTransform::prepare() { if (input.isFinished()) { - output.finish(); - return Status::Finished; + if (is_finished) + { + output.finish(); + return Status::Finished; + } + is_finished = true; + return Status::Ready; } input.setNeeded(); @@ -71,16 +76,17 @@ void IInflatingTransform::work() current_chunk = generate(); generated = true; - can_generate = canGenerate(); + can_generate = canGenerate(is_finished); } else { - if (!has_input) - throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it wasn't read"); + if (has_input) + { + consume(std::move(current_chunk)); + has_input = false; + } - consume(std::move(current_chunk)); - has_input = false; - can_generate = canGenerate(); + can_generate = canGenerate(is_finished); } } diff --git a/src/Processors/IInflatingTransform.h b/src/Processors/IInflatingTransform.h index 0ad12f6cd65..3f832b0e5bc 100644 --- a/src/Processors/IInflatingTransform.h +++ b/src/Processors/IInflatingTransform.h @@ -10,13 +10,14 @@ namespace DB /// for (chunk : input_chunks) /// { /// transform.consume(chunk); -/// /// while (transform.canGenerate()) /// { /// transformed_chunk = transform.generate(); /// ... (process transformed chunk) /// } /// } +/// while (transform.canGenerate(true)) +/// ... (process remaining data) /// class IInflatingTransform : public IProcessor { @@ -30,7 +31,7 @@ protected: bool can_generate = false; virtual void consume(Chunk chunk) = 0; - virtual bool canGenerate() = 0; + virtual bool canGenerate(bool is_read_finished) = 0; virtual Chunk generate() = 0; public: @@ -41,6 +42,9 @@ public: InputPort & getInputPort() { return input; } OutputPort & getOutputPort() { return output; } + + /// canGenerate can flush data when input is finished. + bool is_finished = false; }; } diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp index 1304434d74e..b7a6ba85963 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.cpp +++ b/src/Processors/Transforms/ArrayJoinTransform.cpp @@ -38,14 +38,14 @@ void ArrayJoinTransform::consume(Chunk chunk) } -bool ArrayJoinTransform::canGenerate() +bool ArrayJoinTransform::canGenerate(bool) { return result_iterator && result_iterator->hasNext(); } Chunk ArrayJoinTransform::generate() { - if (!canGenerate()) + if (!canGenerate(false)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in ArrayJoinTransform"); auto block = result_iterator->next(); diff --git a/src/Processors/Transforms/ArrayJoinTransform.h b/src/Processors/Transforms/ArrayJoinTransform.h index 4219135982d..de291a0422f 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.h +++ b/src/Processors/Transforms/ArrayJoinTransform.h @@ -26,7 +26,7 @@ public: protected: void consume(Chunk chunk) override; - bool canGenerate() override; + bool canGenerate(bool is_read_finished) override; Chunk generate() override; private: diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 0d69b6e0a8d..b79987161fd 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -56,49 +56,35 @@ void SquashingChunksTransform::work() SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, true), squashing(min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingChunksTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - chunk.setColumns(block.getColumns(), block.rows()); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); - } + current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); } -IProcessor::Status SimpleSquashingChunksTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) - { - if (output.isFinished()) - return Status::Finished; + if (!current_block) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - if (!output.canPush()) - return Status::PortFull; + Chunk result(current_block.getColumns(), current_block.rows()); + current_block.clear(); + return result; +} - if (has_output) - { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; - } - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; - } - return ISimpleTransform::prepare(); +bool SimpleSquashingChunksTransform::canGenerate(bool is_read_finished) +{ + if (current_block) + return true; + + if (is_read_finished) + current_block = squashing.add({}); + + return bool(current_block); } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..d0316c39a43 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -29,7 +30,7 @@ private: }; /// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingChunksTransform : public ISimpleTransform +class SimpleSquashingChunksTransform : public IInflatingTransform { public: explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); @@ -37,14 +38,13 @@ public: String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate(bool is_read_finished) override; + Chunk generate() override; private: SquashingTransform squashing; - /// When consumption is finished we need to release the final chunk regardless of its size. - bool finished = false; + Block current_block; }; } diff --git a/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql b/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql index 2ab324df787..d5ab82ba064 100644 --- a/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql +++ b/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql @@ -23,10 +23,6 @@ select t1.* from t1_all t1 join t2_all t2 on t1.a = t2.a ORDER BY t1.a; SELECT '-'; --- make sure data is fully written when reading from distributed -optimize table t1_local final; -optimize table t2_local final; - set distributed_product_mode = 'global'; select * from t1_all t1 where t1.a in (select t2.a from t2_all t2); explain syntax select t1.* from t1_all t1 join t2_all t2 on t1.a = t2.a; From 3f4f253c39b7118aab95b20af900d79cf1065cad Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 20 May 2024 08:09:55 +0000 Subject: [PATCH 519/651] Enable keep_free_space_bytes for metadata storage --- .../ObjectStorages/MetadataStorageFactory.cpp | 4 ++- ...02963_test_flexible_disk_configuration.sql | 26 +++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index 4a3e8a37d28..ab7c2069b43 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -99,8 +99,10 @@ void registerMetadataStorageFromDisk(MetadataStorageFactory & factory) { auto metadata_path = config.getString(config_prefix + ".metadata_path", fs::path(Context::getGlobalContextInstance()->getPath()) / "disks" / name / ""); + auto metadata_keep_free_space_bytes = config.getUInt64(config_prefix + ".metadata_keep_free_space_bytes", 0); + fs::create_directories(metadata_path); - auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0, config, config_prefix); + auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, metadata_keep_free_space_bytes, config, config_prefix); auto key_compatibility_prefix = getObjectKeyCompatiblePrefix(*object_storage, config, config_prefix); return std::make_shared(metadata_disk, key_compatibility_prefix); }); diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql index 552291b2f83..8f67cd7e030 100644 --- a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql +++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql @@ -30,6 +30,28 @@ settings disk=disk(name='test2', drop table test; create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test3', + type = object_storage, + object_storage_type = s3, + metadata_storage_type = local, + metadata_keep_free_space_bytes = 1024, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); +drop table test; + +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test4', + type = object_storage, + object_storage_type = s3, + metadata_storage_type = local, + metadata_keep_free_space_bytes = 0, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); +drop table test; + +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test5', type = object_storage, object_storage_type = s3, metadata_type = lll, @@ -38,7 +60,7 @@ settings disk=disk(name='test3', secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } create table test (a Int32) engine = MergeTree() order by tuple() -settings disk=disk(name='test4', +settings disk=disk(name='test6', type = object_storage, object_storage_type = kkk, metadata_type = local, @@ -47,7 +69,7 @@ settings disk=disk(name='test4', secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } create table test (a Int32) engine = MergeTree() order by tuple() -settings disk=disk(name='test5', +settings disk=disk(name='test7', type = kkk, object_storage_type = s3, metadata_type = local, From e055de32bedb80dff96bd0f8809e967dafe1c0cb Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 20 May 2024 08:11:48 +0000 Subject: [PATCH 520/651] Add docs --- docs/en/operations/storing-data.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 9b316960750..53ecd66396d 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -421,6 +421,7 @@ Other parameters: * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. * `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). * `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `metadata_keep_free_space_bytes` - the amount of free metadata disk space to be reserved. Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). From 6e605030d14d1ddba62d97d42a47067d08a78d8b Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Tue, 21 May 2024 11:55:39 +0000 Subject: [PATCH 521/651] Trigger Ci From b899bd07cfdee3a2919583482c0da2354bbb348a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 22 May 2024 16:12:33 +0200 Subject: [PATCH 522/651] Better --- utils/keeper-bench/Runner.cpp | 90 +++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 36 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index a625a7f157d..ed7e09685f0 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -4,30 +4,28 @@ #include #include +#include #include -#include "Common/ConcurrentBoundedQueue.h" -#include "Common/Exception.h" -#include "Common/ZooKeeper/IKeeper.h" -#include "Common/ZooKeeper/ZooKeeperArgs.h" -#include "Common/ZooKeeper/ZooKeeperCommon.h" -#include "Common/ZooKeeper/ZooKeeperConstants.h" -#include -#include -#include "Coordination/KeeperSnapshotManager.h" -#include "Core/ColumnWithTypeAndName.h" -#include "Core/ColumnsWithTypeAndName.h" +#include +#include #include -#include "IO/ReadBuffer.h" -#include "IO/ReadBufferFromFile.h" -#include "base/Decimal.h" -#include "base/types.h" -#include +#include +#include +#include +#include #include #include #include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace CurrentMetrics @@ -884,6 +882,7 @@ struct SetupNodeCollector if (initial_storage->container.contains(path)) return; + new_nodes = true; std::cerr << "Adding expected node " << path << std::endl; Coordination::Requests create_ops; @@ -923,11 +922,19 @@ struct SetupNodeCollector void generateSnapshot() { - std::cerr << "Generating snapshot with starting data" << std::endl; std::lock_guard lock(nodes_mutex); + if (!new_nodes) + { + std::cerr << "No new nodes added" << std::endl; + return; + } + + std::cerr << "Generating snapshot with starting data" << std::endl; DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(initial_storage->getZXID(), 1, std::make_shared()); DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); snapshot_manager->serializeSnapshotToDisk(snapshot); + + new_nodes = false; } std::mutex nodes_mutex; @@ -935,6 +942,7 @@ struct SetupNodeCollector Coordination::KeeperStoragePtr initial_storage; std::unordered_set nodes_created_during_replay; std::optional snapshot_manager; + bool new_nodes = false; }; void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type) @@ -972,23 +980,25 @@ void requestFromLogExecutor(std::shared_ptrtoString(), response.error, *expected_result) - // << std::endl; +#if 0 + if (*expected_result != response.error) + { + std::cerr << fmt::format( + "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result) + << std::endl; - // if (const auto * multi_response = dynamic_cast(&response)) - // { - // std::string subresponses; - // for (size_t i = 0; i < multi_response->responses.size(); ++i) - // { - // subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); - // } + if (const auto * multi_response = dynamic_cast(&response)) + { + std::string subresponses; + for (size_t i = 0; i < multi_response->responses.size(); ++i) + { + subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error); + } - // std::cerr << "Subresponses\n" << subresponses << std::endl; - // } - //} + std::cerr << "Subresponses\n" << subresponses << std::endl; + } + } +#endif } request_promise->set_value(); @@ -1048,8 +1058,16 @@ void Runner::runBenchmarkFromLog() pool->wait(); - dumpStats("Write", stats.write_requests); - dumpStats("Read", stats.read_requests); + + if (setup_nodes_collector) + { + setup_nodes_collector->generateSnapshot(); + } + else + { + dumpStats("Write", stats.write_requests); + dumpStats("Read", stats.read_requests); + } }); auto push_request = [&](RequestFromLog request) From 39eef359dbc142c53d9f0162a36f0fee74e5edcc Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 22 May 2024 14:39:13 +0000 Subject: [PATCH 523/651] Add IInflatingTransform::getRemaining instead of flag in canGenerate --- src/Processors/IInflatingTransform.cpp | 21 ++++++++++------ src/Processors/IInflatingTransform.h | 7 +++--- .../Transforms/ArrayJoinTransform.cpp | 4 +-- .../Transforms/ArrayJoinTransform.h | 2 +- .../Transforms/SquashingChunksTransform.cpp | 25 +++++++++---------- .../Transforms/SquashingChunksTransform.h | 7 +++--- 6 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp index bc0b3e8459e..a59eda0feb2 100644 --- a/src/Processors/IInflatingTransform.cpp +++ b/src/Processors/IInflatingTransform.cpp @@ -76,17 +76,24 @@ void IInflatingTransform::work() current_chunk = generate(); generated = true; - can_generate = canGenerate(is_finished); + can_generate = canGenerate(); + } + else if (is_finished) + { + if (can_generate || generated || has_input) + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot finish work because it has generated data or has input data"); + + current_chunk = getRemaining(); + generated = !current_chunk.empty(); } else { - if (has_input) - { - consume(std::move(current_chunk)); - has_input = false; - } + if (!has_input) + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it wasn't read"); - can_generate = canGenerate(is_finished); + consume(std::move(current_chunk)); + has_input = false; + can_generate = canGenerate(); } } diff --git a/src/Processors/IInflatingTransform.h b/src/Processors/IInflatingTransform.h index 3f832b0e5bc..0cb7fc06cc4 100644 --- a/src/Processors/IInflatingTransform.h +++ b/src/Processors/IInflatingTransform.h @@ -16,8 +16,8 @@ namespace DB /// ... (process transformed chunk) /// } /// } -/// while (transform.canGenerate(true)) -/// ... (process remaining data) +/// transformed_chunk = transform.getRemaining(); +/// ... (process remaining data) /// class IInflatingTransform : public IProcessor { @@ -31,8 +31,9 @@ protected: bool can_generate = false; virtual void consume(Chunk chunk) = 0; - virtual bool canGenerate(bool is_read_finished) = 0; + virtual bool canGenerate() = 0; virtual Chunk generate() = 0; + virtual Chunk getRemaining() { return {}; } public: IInflatingTransform(Block input_header, Block output_header); diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp index b7a6ba85963..1304434d74e 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.cpp +++ b/src/Processors/Transforms/ArrayJoinTransform.cpp @@ -38,14 +38,14 @@ void ArrayJoinTransform::consume(Chunk chunk) } -bool ArrayJoinTransform::canGenerate(bool) +bool ArrayJoinTransform::canGenerate() { return result_iterator && result_iterator->hasNext(); } Chunk ArrayJoinTransform::generate() { - if (!canGenerate(false)) + if (!canGenerate()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in ArrayJoinTransform"); auto block = result_iterator->next(); diff --git a/src/Processors/Transforms/ArrayJoinTransform.h b/src/Processors/Transforms/ArrayJoinTransform.h index de291a0422f..4219135982d 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.h +++ b/src/Processors/Transforms/ArrayJoinTransform.h @@ -26,7 +26,7 @@ public: protected: void consume(Chunk chunk) override; - bool canGenerate(bool is_read_finished) override; + bool canGenerate() override; Chunk generate() override; private: diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index b79987161fd..267490dc89e 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -62,29 +62,28 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( void SimpleSquashingChunksTransform::consume(Chunk chunk) { - current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } Chunk SimpleSquashingChunksTransform::generate() { - if (!current_block) + if (squashed_chunk.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - Chunk result(current_block.getColumns(), current_block.rows()); - current_block.clear(); - return result; + return std::move(squashed_chunk); } - -bool SimpleSquashingChunksTransform::canGenerate(bool is_read_finished) +bool SimpleSquashingChunksTransform::canGenerate() { - if (current_block) - return true; + return !squashed_chunk.empty(); +} - if (is_read_finished) - current_block = squashing.add({}); - - return bool(current_block); +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + return std::move(squashed_chunk); } } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index d0316c39a43..8c30a6032e4 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -39,12 +39,13 @@ public: protected: void consume(Chunk chunk) override; - bool canGenerate(bool is_read_finished) override; + bool canGenerate() override; Chunk generate() override; + Chunk getRemaining() override; private: SquashingTransform squashing; - - Block current_block; + Chunk squashed_chunk; }; + } From 7e0e953ec9913435505d75285d1e5244c869a797 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 21 May 2024 17:01:16 +0000 Subject: [PATCH 524/651] Add debug logging to EmbeddedRocksDBBulkSink --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 13 ++++++++----- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 16 ++++++++-------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 7094578a9cc..0baa234e7a3 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -155,7 +155,7 @@ std::vector EmbeddedRocksDBBulkSink::squash(Chunk chunk) return {}; } -std::pair EmbeddedRocksDBBulkSink::serializeChunks(const std::vector & input_chunks) const +std::pair EmbeddedRocksDBBulkSink::serializeChunks(std::vector && input_chunks) const { auto serialized_key_column = ColumnString::create(); auto serialized_value_column = ColumnString::create(); @@ -168,7 +168,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); - for (const auto & chunk : input_chunks) + for (auto && chunk : input_chunks) { const auto & columns = chunk.getColumns(); auto rows = chunk.getNumRows(); @@ -193,13 +193,14 @@ std::pair EmbeddedRocksDBBulkSink::seriali void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { - std::vector to_written = squash(std::move(chunk_)); + std::vector chunks_to_write = squash(std::move(chunk_)); - if (to_written.empty()) + if (chunks_to_write.empty()) return; - auto [serialized_key_column, serialized_value_column] = serializeChunks(to_written); + auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write)); auto sst_file_path = getTemporarySSTFilePath(); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); @@ -209,6 +210,7 @@ void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) if (auto status = storage.rocksdb_ptr->IngestExternalFile({sst_file_path}, ingest_options); !status.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "SST file {} has been ingested", sst_file_path); if (fs::exists(sst_file_path)) (void)fs::remove(sst_file_path); } @@ -237,4 +239,5 @@ bool EmbeddedRocksDBBulkSink::isEnoughSize(const Chunk & chunk) const { return chunk.getNumRows() >= min_block_size_rows; } + } diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 19ce1e3b83e..46193b152ca 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -49,7 +49,7 @@ private: bool isEnoughSize(const std::vector & input_chunks) const; bool isEnoughSize(const Chunk & chunk) const; /// Serialize chunks to rocksdb key-value pairs - std::pair serializeChunks(const std::vector & input_chunks) const; + std::pair serializeChunks(std::vector && input_chunks) const; StorageEmbeddedRocksDB & storage; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 1a9aa6d0f41..e00cea27c49 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -316,6 +316,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt void StorageEmbeddedRocksDB::drop() { + std::lock_guard lock(rocksdb_ptr_mx); rocksdb_ptr->Close(); rocksdb_ptr = nullptr; } @@ -463,18 +464,13 @@ void StorageEmbeddedRocksDB::initDB() { rocksdb::DB * db; if (read_only) - { status = rocksdb::DB::OpenForReadOnly(merged, rocksdb_dir, &db); - } else - { status = rocksdb::DB::Open(merged, rocksdb_dir, &db); - } + if (!status.ok()) - { - throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", - rocksdb_dir, status.ToString()); - } + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", rocksdb_dir, status.ToString()); + rocksdb_ptr = std::unique_ptr(db); } } @@ -589,8 +585,12 @@ SinkToStoragePtr StorageEmbeddedRocksDB::write( const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context, bool /*async_insert*/) { if (getSettings().optimize_for_bulk_insert) + { + LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using bulk insert"); return std::make_shared(query_context, *this, metadata_snapshot); + } + LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using regular insert"); return std::make_shared(*this, metadata_snapshot); } From 7314689712549c1c2bf528fc8ef7638a2eb77ddf Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 22 May 2024 11:04:17 +0000 Subject: [PATCH 525/651] Store logger in StorageEmbeddedRocksDB --- src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 5 +++-- src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index e00cea27c49..c3b7ae64c7e 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -189,6 +189,7 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, , rocksdb_dir(std::move(rocksdb_dir_)) , ttl(ttl_) , read_only(read_only_) + , log(getLogger(fmt::format("StorageEmbeddedRocksDB ({})", getStorageID().getNameForLogs()))) { setInMemoryMetadata(metadata_); setSettings(std::move(settings_)); @@ -586,11 +587,11 @@ SinkToStoragePtr StorageEmbeddedRocksDB::write( { if (getSettings().optimize_for_bulk_insert) { - LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using bulk insert"); + LOG_DEBUG(log, "Using bulk insert"); return std::make_shared(query_context, *this, metadata_snapshot); } - LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using regular insert"); + LOG_DEBUG(log, "Using regular insert"); return std::make_shared(*this, metadata_snapshot); } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 9fc58ea6b38..61592398954 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -124,5 +124,7 @@ private: bool read_only; void initDB(); + + LoggerPtr log; }; } From 6f4a8bf2ea5bff2afd619f1bad8b034b325bcbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 22 May 2024 17:32:01 +0200 Subject: [PATCH 526/651] Simplify test --- .../03033_final_undefined_last_mark.reference | 4 ++-- .../03033_final_undefined_last_mark.sql | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference index bf0a25f24e4..a30b755709b 100644 --- a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference @@ -1,2 +1,2 @@ -GOOD 11338881281426660955 14765404159170880511 -GOOD 11338881281426660955 14765404159170880511 +Disabled 11338881281426660955 14765404159170880511 +Enabled 11338881281426660955 14765404159170880511 diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql index 2c13da42ca4..25a30a365a5 100644 --- a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql +++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql @@ -1,23 +1,23 @@ -- Tags: no-random-settings, no-random-merge-tree-settings +DROP TABLE IF EXISTS account_test; + CREATE TABLE account_test ( `id` UInt64, `row_ver` UInt64, ) ENGINE = ReplacingMergeTree(row_ver) -PARTITION BY id % 64 ORDER BY id -SETTINGS index_granularity = 512, index_granularity_bytes = 0, +SETTINGS index_granularity = 16, index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, min_rows_for_compact_part = 0, min_bytes_for_compact_part = 0; -INSERT INTO account_test - SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 50000; +SYSTEM STOP MERGES account_test; -INSERT INTO account_test - SELECT * FROM (SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 1000) WHERE row_ver > 14098131981223776000; +INSERT INTO account_test VALUES (11338881281426660955,717769962224129342),(12484100559155738267,7950971667203174918),(7603729260199571867,3255798127676911942),(7023543111808724827,911615979861855126),(10293135086416484571,3264379259750736572),(15561193439904316763,8419819469587131454),(17632407413882870235,7252071832370181502),(17009726455991851227,7525297506591593939),(12392078953873778779,8473049173389293961),(15283366022689446555,11692491360262171467),(9087459014730986523,2783662960221838603),(293823584550906267,4847630088179732782),(15693186194430465755,8163804880526285623),(7353080168325584795,17315892478487497859),(5980311238303466523,6943353798059390089),(14242621660019578011,8684624667957352769),(8241843507567433563,15731952080102886438); +INSERT INTO account_test VALUES (11338881281426660955, 14765404159170880511); -SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0; -SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1; +SELECT 'Disabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0; +SELECT 'Enabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1; From 48cab9e9dbeb16d1be33bdcce9206c472445cd9f Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 22 May 2024 15:53:32 +0000 Subject: [PATCH 527/651] Fix tests --- src/Columns/ColumnDynamic.cpp | 6 +++--- src/Columns/ColumnDynamic.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index d63a03dbafd..3c147b6f123 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include namespace DB { @@ -662,8 +662,8 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source all_variants.push_back(source_variants[i]); it = total_sizes.emplace(variant_name, 0).first; } - - size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name); + auto statistics_it = source_statistics.data.find(variant_name); + size_t size = statistics_it == source_statistics.data.end() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : statistics_it->second; it->second += size; } } diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 8aece765308..27ad0dd583f 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -96,13 +96,13 @@ public: MutableColumnPtr cloneEmpty() const override { - /// Keep current dynamic structure but not statistics. - return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types); + /// Keep current dynamic structure + return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics); } MutableColumnPtr cloneResized(size_t size) const override { - return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types); + return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics); } size_t size() const override From 332f449a0cec30616180266d4a43a4e658794b1f Mon Sep 17 00:00:00 2001 From: Danila Puzov Date: Wed, 22 May 2024 18:59:39 +0300 Subject: [PATCH 528/651] Issues --- src/Functions/generateSnowflakeID.cpp | 272 +++++++++++------- src/Functions/serial.cpp | 67 +++-- .../03129_serial_test_zookeeper.sql | 16 +- .../03130_generateSnowflakeId.reference | 11 + .../0_stateless/03130_generateSnowflakeId.sql | 29 ++ .../03130_generate_snowflake_id.reference | 3 - .../03130_generate_snowflake_id.sql | 11 - 7 files changed, 252 insertions(+), 157 deletions(-) create mode 100644 tests/queries/0_stateless/03130_generateSnowflakeId.reference create mode 100644 tests/queries/0_stateless/03130_generateSnowflakeId.sql delete mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.reference delete mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.sql diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index d70b8349cd8..6ae5dc13af0 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -5,6 +5,7 @@ #include #include #include +#include "base/types.h" namespace DB @@ -34,43 +35,153 @@ namespace - The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes */ +/// bit counts constexpr auto timestamp_bits_count = 41; constexpr auto machine_id_bits_count = 10; constexpr auto machine_seq_num_bits_count = 12; -constexpr int64_t timestamp_mask = ((1LL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); -constexpr int64_t machine_id_mask = ((1LL << machine_id_bits_count) - 1) << machine_seq_num_bits_count; -constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_bits_count) - 1; -constexpr int64_t max_machine_seq_num = machine_seq_num_mask; +/// bits masks for Snowflake ID components +// constexpr uint64_t timestamp_mask = ((1ULL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); // unused +constexpr uint64_t machine_id_mask = ((1ULL << machine_id_bits_count) - 1) << machine_seq_num_bits_count; +constexpr uint64_t machine_seq_num_mask = (1ULL << machine_seq_num_bits_count) - 1; -Int64 getMachineID() +/// max values +constexpr uint64_t max_machine_seq_num = machine_seq_num_mask; + +uint64_t getMachineID() { UUID server_uuid = ServerUUID::get(); /// hash into 64 bits - UInt64 hi = UUIDHelpers::getHighBytes(server_uuid); - UInt64 lo = UUIDHelpers::getLowBytes(server_uuid); - return ((hi * 11) ^ (lo * 17)) & machine_id_mask; + uint64_t hi = UUIDHelpers::getHighBytes(server_uuid); + uint64_t lo = UUIDHelpers::getLowBytes(server_uuid); + /// return only 10 bits + return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count; } -Int64 getTimestamp() +uint64_t getTimestamp() { auto now = std::chrono::system_clock::now(); auto ticks_since_epoch = std::chrono::duration_cast(now.time_since_epoch()).count(); - return ticks_since_epoch & ((1LL << timestamp_bits_count) - 1); + return static_cast(ticks_since_epoch) & ((1ULL << timestamp_bits_count) - 1); } +struct SnowflakeComponents { + uint64_t timestamp; + uint64_t machind_id; + uint64_t machine_seq_num; +}; + +SnowflakeComponents toComponents(uint64_t snowflake) { + return { + .timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)), + .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count), + .machine_seq_num = (snowflake & machine_seq_num_mask) + }; } -class FunctionSnowflakeID : public IFunction +uint64_t toSnowflakeID(SnowflakeComponents components) { + return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) | + components.machind_id << (machine_seq_num_bits_count) | + components.machine_seq_num); +} + +struct RangeOfSnowflakeIDs { + /// [begin, end) + SnowflakeComponents begin, end; +}; + +/* Get range of `input_rows_count` Snowflake IDs from `max(available, now)` + +1. Calculate Snowflake ID by current timestamp (`now`) +2. `begin = max(available, now)` +3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow +*/ +RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, size_t input_rows_count) { -private: - mutable std::atomic lowest_available_snowflake_id = 0; /// atomic to avoid a mutex + /// 1. `now` + SnowflakeComponents begin = { + .timestamp = getTimestamp(), + .machind_id = getMachineID(), + .machine_seq_num = 0 + }; -public: + /// 2. `begin` + if (begin.timestamp <= available.timestamp) + { + begin.timestamp = available.timestamp; + begin.machine_seq_num = available.machine_seq_num; + } + + /// 3. `end = begin + input_rows_count` + SnowflakeComponents end; + const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1); + if (input_rows_count >= seq_nums_in_current_timestamp_left) + /// if sequence numbers in current timestamp is not enough for rows => update timestamp + end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1); + else + end.timestamp = begin.timestamp; + + end.machind_id = begin.machind_id; + end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask; + + return {begin, end}; +} + +struct GlobalCounterPolicy +{ static constexpr auto name = "generateSnowflakeID"; - static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; - String getName() const override { return name; } + /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. + struct Data + { + static inline std::atomic lowest_available_snowflake_id = 0; + + SnowflakeComponents reserveRange(size_t input_rows_count) + { + uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); + RangeOfSnowflakeIDs range; + do + { + range = getRangeOfAvailableIDs(toComponents(available_snowflake_id), input_rows_count); + } + while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, toSnowflakeID(range.end))); + /// if `compare_exhange` failed => another thread updated `lowest_available_snowflake_id` and we should try again + /// completed => range of IDs [begin, end) is reserved, can return the beginning of the range + + return range.begin; + } + }; +}; + +struct ThreadLocalCounterPolicy +{ + static constexpr auto name = "generateSnowflakeIDThreadMonotonic"; + static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)"; + + /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. + struct Data + { + static inline thread_local uint64_t lowest_available_snowflake_id = 0; + + SnowflakeComponents reserveRange(size_t input_rows_count) + { + RangeOfSnowflakeIDs range = getRangeOfAvailableIDs(toComponents(lowest_available_snowflake_id), input_rows_count); + lowest_available_snowflake_id = toSnowflakeID(range.end); + return range.begin; + } + }; +}; + +} + +template +class FunctionGenerateSnowflakeID : public IFunction, public FillPolicy +{ +public: + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return FillPolicy::name; } size_t getNumberOfArguments() const override { return 0; } bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; } @@ -80,71 +191,36 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!arguments.empty()) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 0.", - getName(), arguments.size()); - } - return std::make_shared(); + FunctionArgumentDescriptors mandatory_args; + FunctionArgumentDescriptors optional_args{ + {"expr", nullptr, nullptr, "Arbitrary Expression"} + }; + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + + return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override { - auto col_res = ColumnVector::create(); - typename ColumnVector::Container & vec_to = col_res->getData(); + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_res->getData(); vec_to.resize(input_rows_count); - if (input_rows_count == 0) { - return col_res; - } - - const Int64 machine_id = getMachineID(); - Int64 current_timestamp = getTimestamp(); - Int64 current_machine_seq_num; - - Int64 available_snowflake_id, next_available_snowflake_id; - - const Int64 input_rows_count_signed = static_cast(input_rows_count); - - do + if (input_rows_count != 0) { - available_snowflake_id = lowest_available_snowflake_id.load(); - const Int64 available_timestamp = (available_snowflake_id & timestamp_mask) >> (machine_id_bits_count + machine_seq_num_bits_count); - const Int64 available_machine_seq_num = available_snowflake_id & machine_seq_num_mask; + typename FillPolicy::Data data; + /// get the begin of available snowflake ids range + SnowflakeComponents snowflake_id = data.reserveRange(input_rows_count); - if (current_timestamp > available_timestamp) + for (UInt64 & to_row : vec_to) { - /// handle overflow - current_machine_seq_num = 0; - } - else - { - current_timestamp = available_timestamp; - current_machine_seq_num = available_machine_seq_num; - } - - /// calculate new lowest_available_snowflake_id - const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1); - Int64 new_timestamp; - if (input_rows_count_signed >= seq_nums_in_current_timestamp_left) - new_timestamp = current_timestamp + 1 + (input_rows_count_signed - seq_nums_in_current_timestamp_left) / max_machine_seq_num; - else - new_timestamp = current_timestamp; - const Int64 new_machine_seq_num = (current_machine_seq_num + input_rows_count_signed) & machine_seq_num_mask; - next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num; - } - while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id)); - /// failed CAS => another thread updated `lowest_available_snowflake_id` - /// successful CAS => we have our range of exclusive values - - for (Int64 & to_row : vec_to) - { - to_row = (current_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | current_machine_seq_num; - if (current_machine_seq_num++ == max_machine_seq_num) - { - current_machine_seq_num = 0; - ++current_timestamp; + to_row = toSnowflakeID(snowflake_id); + if (snowflake_id.machine_seq_num++ == max_machine_seq_num) + { + snowflake_id.machine_seq_num = 0; + ++snowflake_id.timestamp; + } } } @@ -153,43 +229,27 @@ public: }; +template +void registerSnowflakeIDGenerator(auto& factory) +{ + static constexpr auto doc_syntax_format = "{}([expression])"; + static constexpr auto example_format = "SELECT {}()"; + static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; + + FunctionDocumentation::Description doc_description = FillPolicy::doc_description; + FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name); + FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UInt64"; + FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; + FunctionDocumentation::Categories doc_categories = {"Snowflake ID"}; + + factory.template registerFunction>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); +} + REGISTER_FUNCTION(GenerateSnowflakeID) { - factory.registerFunction(FunctionDocumentation - { - .description=R"( -Generates a SnowflakeID -- unique identificators contains: -- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970) -- The middle 10 bits are the machine ID -- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes - -In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0. -This function guarantees strict monotony on 1 machine and differences in values obtained on different machines. -)", - .syntax = "generateSnowflakeID()", - .arguments{}, - .returned_value = "Column of Int64", - .examples{ - {"single call", "SELECT generateSnowflakeID();", R"( -┌─generateSnowflakeID()─┐ -│ 7195510166884597760 │ -└───────────────────────┘)"}, - {"column call", "SELECT generateSnowflakeID() FROM numbers(10);", R"( -┌─generateSnowflakeID()─┐ -│ 7195516038159417344 │ -│ 7195516038159417345 │ -│ 7195516038159417346 │ -│ 7195516038159417347 │ -│ 7195516038159417348 │ -│ 7195516038159417349 │ -│ 7195516038159417350 │ -│ 7195516038159417351 │ -│ 7195516038159417352 │ -│ 7195516038159417353 │ -└───────────────────────┘)"}, - }, - .categories{"Unique identifiers", "Snowflake ID"} - }); + registerSnowflakeIDGenerator(factory); + registerSnowflakeIDGenerator(factory); } } diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp index de3036ad242..d65df83c9f9 100644 --- a/src/Functions/serial.cpp +++ b/src/Functions/serial.cpp @@ -1,9 +1,12 @@ +#include "Common/Exception.h" #include #include #include #include +#include #include + namespace DB { @@ -14,6 +17,9 @@ namespace ErrorCodes extern const int KEEPER_EXCEPTION; } +constexpr auto function_node_name = "/serial_ids/"; +constexpr size_t MAX_SERIES_NUMBER = 1000; // ? + class FunctionSerial : public IFunction { private: @@ -21,7 +27,7 @@ private: ContextPtr context; public: - static constexpr auto name = "serial"; + static constexpr auto name = "generateSerialID"; explicit FunctionSerial(ContextPtr context_) : context(context_) { @@ -48,16 +54,12 @@ public: bool hasInformationAboutMonotonicity() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1.", - getName(), arguments.size()); - if (!isStringOrFixedString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Type of argument for function {} doesn't match: passed {}, should be string", - getName(), arguments[0]->getName()); + FunctionArgumentDescriptors mandatory_args{ + {"series identifier", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} + }; + validateFunctionArgumentTypes(*this, arguments, mandatory_args); return std::make_shared(); } @@ -71,12 +73,19 @@ public: if (zk->expired()) zk = context->getZooKeeper(); + // slow? + if (zk->exists(function_node_name) && zk->getChildren(function_node_name).size() == MAX_SERIES_NUMBER) { + throw Exception(ErrorCodes::KEEPER_EXCEPTION, + "At most {} serial nodes can be created", + MAX_SERIES_NUMBER); + } + auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); vec_to.resize(input_rows_count); - const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString(); + const auto & serial_path = function_node_name + arguments[0].column->getDataAt(0).toString(); /// CAS in ZooKeeper /// `get` value and version, `trySet` new with version check @@ -130,28 +139,28 @@ Generates and returns sequential numbers starting from the previous counter valu This function takes a constant string argument - a series identifier. The server should be configured with a ZooKeeper. )", - .syntax = "serial(identifier)", + .syntax = "generateSerialID(identifier)", .arguments{ - {"series identifier", "Series identifier (String)"} + {"series identifier", "Series identifier (String or FixedString)"} }, .returned_value = "Sequential numbers of type Int64 starting from the previous counter value", .examples{ - {"first call", "SELECT serial('id1')", R"( -┌─serial('id1')──┐ -│ 1 │ -└────────────────┘)"}, - {"second call", "SELECT serial('id1')", R"( -┌─serial('id1')──┐ -│ 2 │ -└────────────────┘)"}, - {"column call", "SELECT *, serial('id1') FROM test_table", R"( -┌─CounterID─┬─UserID─┬─ver─┬─serial('id1')──┐ -│ 1 │ 3 │ 3 │ 3 │ -│ 1 │ 1 │ 1 │ 4 │ -│ 1 │ 2 │ 2 │ 5 │ -│ 1 │ 5 │ 5 │ 6 │ -│ 1 │ 4 │ 4 │ 7 │ -└───────────┴────────┴─────┴────────────────┘ + {"first call", "SELECT generateSerialID('id1')", R"( +┌─generateSerialID('id1')──┐ +│ 1 │ +└──────────────────────────┘)"}, + {"second call", "SELECT generateSerialID('id1')", R"( +┌─generateSerialID('id1')──┐ +│ 2 │ +└──────────────────────────┘)"}, + {"column call", "SELECT *, generateSerialID('id1') FROM test_table", R"( +┌─CounterID─┬─UserID─┬─ver─┬─generateSerialID('id1')──┐ +│ 1 │ 3 │ 3 │ 3 │ +│ 1 │ 1 │ 1 │ 4 │ +│ 1 │ 2 │ 2 │ 5 │ +│ 1 │ 5 │ 5 │ 6 │ +│ 1 │ 4 │ 4 │ 7 │ +└───────────┴────────┴─────┴──────────────────────────┘ )"}}, .categories{"Unique identifiers"} }); diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql index c3395009477..2bd60656259 100644 --- a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql +++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql @@ -1,12 +1,12 @@ -- Tags: zookeeper -SELECT serial('x'); -SELECT serial('x'); -SELECT serial('y'); -SELECT serial('x') FROM numbers(5); +SELECT generateSerialID('x'); +SELECT generateSerialID('x'); +SELECT generateSerialID('y'); +SELECT generateSerialID('x') FROM numbers(5); -SELECT serial(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT serial('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT serial(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT generateSerialID(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT generateSerialID('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT generateSerialID(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT serial('z'), serial('z') FROM numbers(5); +SELECT generateSerialID('z'), generateSerialID('z') FROM numbers(5); diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference new file mode 100644 index 00000000000..8cdced96770 --- /dev/null +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -0,0 +1,11 @@ +-- generateSnowflakeID -- +1 +1 +0 +0 +1 +100 +-- generateSnowflakeIDThreadMonotonic -- +1 +1 +100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql new file mode 100644 index 00000000000..3e994149d2b --- /dev/null +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -0,0 +1,29 @@ +SELECT '-- generateSnowflakeID --'; +SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -- check machine sequence number is zero +SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero + +SELECT generateSnowflakeID(1) = generateSnowflakeID(2); +SELECT generateSnowflakeID() = generateSnowflakeID(1); +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); + +SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT count(*) +FROM +( + SELECT DISTINCT generateSnowflakeID() + FROM numbers(100) +); + +SELECT '-- generateSnowflakeIDThreadMonotonic --'; +SELECT bitShiftLeft(toUInt64(generateSnowflakeIDThreadMonotonic()), 52) = 0; -- check machine sequence number is zero +SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero + +SELECT generateSnowflakeIDThreadMonotonic(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT count(*) +FROM +( + SELECT DISTINCT generateSnowflakeIDThreadMonotonic() + FROM numbers(100) +); \ No newline at end of file diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.reference b/tests/queries/0_stateless/03130_generate_snowflake_id.reference deleted file mode 100644 index 2049ba26379..00000000000 --- a/tests/queries/0_stateless/03130_generate_snowflake_id.reference +++ /dev/null @@ -1,3 +0,0 @@ -1 -1 -10 diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.sql b/tests/queries/0_stateless/03130_generate_snowflake_id.sql deleted file mode 100644 index 669814c9ecb..00000000000 --- a/tests/queries/0_stateless/03130_generate_snowflake_id.sql +++ /dev/null @@ -1,11 +0,0 @@ -SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; - -SELECT generateSnowflakeID(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } - -SELECT count(*) -FROM -( - SELECT DISTINCT generateSnowflakeID() - FROM numbers(10) -) \ No newline at end of file From b6aa841e575a6594d159be2cc2a5fbc1391190ce Mon Sep 17 00:00:00 2001 From: Danila Puzov Date: Wed, 22 May 2024 19:26:48 +0300 Subject: [PATCH 529/651] Docs for generateSnowflakeID --- .../sql-reference/functions/uuid-functions.md | 126 ++++++++++++++++++ src/Functions/generateSnowflakeID.cpp | 2 +- 2 files changed, 127 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d1b833c2439..80d7215b9ef 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -690,6 +690,132 @@ serverUUID() Type: [UUID](../data-types/uuid.md). +## generateSnowflakeID + +Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231). + +Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. +In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. + +Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries. + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ +``` + +**Syntax** + +``` sql +generateSnowflakeID([expr]) +``` + +**Arguments** + +- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. + +**Returned value** + +A value of type UInt64. + +**Example** + +First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table. + +``` sql +CREATE TABLE tab (id UInt64) ENGINE = Memory; + +INSERT INTO tab SELECT generateSnowflakeID(); + +SELECT * FROM tab; +``` + +Result: + +```response +┌──────────────────id─┐ +│ 7199081390080409600 │ +└─────────────────────┘ +``` + +**Example with multiple Snowflake IDs generated per row** + +```sql +SELECT generateSnowflakeID(1), generateSnowflakeID(2); + +┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐ +│ 7199081609652224000 │ 7199081609652224001 │ +└────────────────────────┴────────────────────────┘ +``` + +## generateSnowflakeIDThreadMonotonic + +Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231). + +Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. + +This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs. + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +|0| timestamp | +├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +| | machine_id | machine_seq_num | +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ +``` + +**Syntax** + +``` sql +generateSnowflakeIDThreadMonotonic([expr]) +``` + +**Arguments** + +- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. + +**Returned value** + +A value of type UInt64. + +**Example** + +First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table. + +``` sql +CREATE TABLE tab (id UInt64) ENGINE = Memory; + +INSERT INTO tab SELECT generateSnowflakeIDThreadMonotonic(); + +SELECT * FROM tab; +``` + +Result: + +```response +┌──────────────────id─┐ +│ 7199082832006627328 │ +└─────────────────────┘ +``` + +**Example with multiple Snowflake IDs generated per row** + +```sql +SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic(2); + +┌─generateSnowflakeIDThreadMonotonic(1)─┬─generateSnowflakeIDThreadMonotonic(2)─┐ +│ 7199082940311945216 │ 7199082940316139520 │ +└───────────────────────────────────────┴───────────────────────────────────────┘ +``` + ## See also - [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 6ae5dc13af0..1b26bf44adb 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -28,7 +28,7 @@ namespace |0| timestamp | ├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ | | machine_id | machine_seq_num | -├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ +└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘ - The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970) - The middle 10 bits are the machine ID From a73d60bae5b49bf6b09e4acc05f59cecd528a007 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 22 May 2024 18:35:28 +0200 Subject: [PATCH 530/651] tests for qps_limit_exceeded --- contrib/aws | 2 +- .../integration/helpers/s3_mocks/broken_s3.py | 40 +++- .../test_checking_s3_blobs_paranoid/test.py | 206 +++++++++--------- 3 files changed, 143 insertions(+), 105 deletions(-) diff --git a/contrib/aws b/contrib/aws index 2e12d7c6daf..b7ae6e5bf48 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be +Subproject commit b7ae6e5bf48fb4981f24476bdd187cd35df1e2c6 diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 206f960293f..238b8aac112 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -165,11 +165,35 @@ class _ServerRuntime: '' "" "ExpectedError" - "mock s3 injected error" + "mock s3 injected unretryable error" "txfbd566d03042474888193-00608d7537" "" ) - request_handler.write_error(data) + request_handler.write_error(500, data) + + class SlowDownAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "SlowDown" + "Slow Down." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) + + class QpsLimitExceededAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "QpsLimitExceeded" + "Please reduce your request rate." + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(429, data) class RedirectAction: def __init__(self, host="localhost", port=1): @@ -239,6 +263,10 @@ class _ServerRuntime: self.error_handler = _ServerRuntime.BrokenPipeAction() elif self.action == "redirect_to": self.error_handler = _ServerRuntime.RedirectAction(*self.action_args) + elif self.action == "slow_down": + self.error_handler = _ServerRuntime.SlowDownAction(*self.action_args) + elif self.action == "qps_limit_exceeded": + self.error_handler = _ServerRuntime.QpsLimitExceededAction(*self.action_args) else: self.error_handler = _ServerRuntime.Expected500ErrorAction() @@ -344,12 +372,12 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.end_headers() self.wfile.write(b"Redirected") - def write_error(self, data, content_length=None): + def write_error(self, http_code, data, content_length=None): if content_length is None: content_length = len(data) self.log_message("write_error %s", data) self.read_all_input() - self.send_response(500) + self.send_response(http_code) self.send_header("Content-Type", "text/xml") self.send_header("Content-Length", str(content_length)) self.end_headers() @@ -418,7 +446,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): path = [x for x in parts.path.split("/") if x] assert path[0] == "mock_settings", path if len(path) < 2: - return self.write_error("_mock_settings: wrong command") + return self.write_error(400, "_mock_settings: wrong command") if path[1] == "at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) @@ -477,7 +505,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.log_message("reset") return self._ok() - return self.write_error("_mock_settings: wrong command") + return self.write_error(400, "_mock_settings: wrong command") def do_GET(self): if self.path == "/": diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 22d6d263d23..97fc5de65e7 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -91,7 +91,7 @@ def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"): SELECT ProfileEvents['S3CreateMultipartUpload'], ProfileEvents['S3UploadPart'], - ProfileEvents['S3WriteRequestsErrors'], + ProfileEvents['S3WriteRequestsErrors'] + ProfileEvents['S3WriteRequestsThrottling'], FROM system.query_log WHERE query_id='{query_id}' AND type='{log_type}' @@ -148,7 +148,7 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression ) assert "Code: 499" in error, error - assert "mock s3 injected error" in error, error + assert "mock s3 injected unretryable error" in error, error create_multipart, upload_parts, s3_errors = get_multipart_counters( node, insert_query_id @@ -190,7 +190,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( ) assert "Code: 499" in error, error - assert "mock s3 injected error" in error, error + assert "mock s3 injected unretryable error" in error, error create_multipart, upload_parts, s3_errors = get_multipart_counters( node, insert_query_id @@ -200,18 +200,28 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( assert s3_errors >= 2 -def test_when_s3_connection_refused_is_retried(cluster, broken_s3): +@pytest.mark.parametrize( + "action_and_message", [ + ("slow_down", "DB::Exception: Slow Down."), + ("qps_limit_exceeded", "DB::Exception: Please reduce your request rate."), + ("connection_refused", "Poco::Exception. Code: 1000, e.code() = 111, Connection refused"), + ], + ids=lambda x: x[0] +) +def test_when_error_is_retried(cluster, broken_s3, action_and_message): node = cluster.instances["node"] - broken_s3.setup_fake_multpartuploads() - broken_s3.setup_at_part_upload(count=3, after=2, action="connection_refused") + action, message = action_and_message - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED" + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload(count=3, after=2, action=action) + + insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED" node.query( f""" INSERT INTO TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'http://resolver:8083/root/data/test_when_{action}_retried', 'minio', 'minio123', 'CSV', auto, 'none' ) @@ -234,13 +244,13 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3): assert upload_parts == 39 assert s3_errors == 3 - broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused") - insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1" + broken_s3.setup_at_part_upload(count=1000, after=2, action=action) + insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED_1" error = node.query_and_get_error( f""" INSERT INTO TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'http://resolver:8083/root/data/test_when_{action}_retried', 'minio', 'minio123', 'CSV', auto, 'none' ) @@ -258,7 +268,79 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3): assert "Code: 499" in error, error assert ( - "Poco::Exception. Code: 1000, e.code() = 111, Connection refused" in error + message in error + ), error + + +def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="broken_pipe", + ) + + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + create_multipart, upload_parts, s3_errors = get_multipart_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert create_multipart == 1 + assert upload_parts == 7 + assert s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="broken_pipe", + ) + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" + in error ), error @@ -401,20 +483,20 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ) error = node.query_and_get_error( f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=100, - s3_check_objects_after_upload=0 + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 """, query_id=insert_query_id, ) @@ -427,78 +509,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ), error -def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): - node = cluster.instances["node"] - - broken_s3.setup_fake_multpartuploads() - broken_s3.setup_at_part_upload( - count=3, - after=2, - action="broken_pipe", - ) - - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" - node.query( - f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, - s3_check_objects_after_upload=0 - """, - query_id=insert_query_id, - ) - - create_multipart, upload_parts, s3_errors = get_multipart_counters( - node, insert_query_id, log_type="QueryFinish" - ) - - assert create_multipart == 1 - assert upload_parts == 7 - assert s3_errors == 3 - - broken_s3.setup_at_part_upload( - count=1000, - after=2, - action="broken_pipe", - ) - insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" - error = node.query_and_get_error( - f""" - INSERT INTO - TABLE FUNCTION s3( - 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', - 'minio', 'minio123', - 'CSV', auto, 'none' - ) - SELECT - * - FROM system.numbers - LIMIT 1000000 - SETTINGS - s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, - s3_check_objects_after_upload=0 - """, - query_id=insert_query_id, - ) - - assert "Code: 1000" in error, error - assert ( - "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" - in error - ), error - - def test_query_is_canceled_with_inf_retries(cluster, broken_s3): node = cluster.instances["node_with_inf_s3_retries"] From 52fe1fab97a5f39c99c33deb1054bf319fbbf230 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 22 May 2024 16:46:02 +0000 Subject: [PATCH 531/651] Automatic style fix --- tests/integration/helpers/s3_mocks/broken_s3.py | 4 +++- .../test_checking_s3_blobs_paranoid/test.py | 14 ++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 238b8aac112..7d0127bc1c4 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -266,7 +266,9 @@ class _ServerRuntime: elif self.action == "slow_down": self.error_handler = _ServerRuntime.SlowDownAction(*self.action_args) elif self.action == "qps_limit_exceeded": - self.error_handler = _ServerRuntime.QpsLimitExceededAction(*self.action_args) + self.error_handler = _ServerRuntime.QpsLimitExceededAction( + *self.action_args + ) else: self.error_handler = _ServerRuntime.Expected500ErrorAction() diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 97fc5de65e7..a7fe02b16de 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -201,12 +201,16 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( @pytest.mark.parametrize( - "action_and_message", [ + "action_and_message", + [ ("slow_down", "DB::Exception: Slow Down."), ("qps_limit_exceeded", "DB::Exception: Please reduce your request rate."), - ("connection_refused", "Poco::Exception. Code: 1000, e.code() = 111, Connection refused"), + ( + "connection_refused", + "Poco::Exception. Code: 1000, e.code() = 111, Connection refused", + ), ], - ids=lambda x: x[0] + ids=lambda x: x[0], ) def test_when_error_is_retried(cluster, broken_s3, action_and_message): node = cluster.instances["node"] @@ -267,9 +271,7 @@ def test_when_error_is_retried(cluster, broken_s3, action_and_message): ) assert "Code: 499" in error, error - assert ( - message in error - ), error + assert message in error, error def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): From 1e5069b5dc6f07d7b29b3a94eaad1c15c9842635 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 22 May 2024 19:21:27 +0200 Subject: [PATCH 532/651] Fix duplicate include --- src/TableFunctions/ITableFunctionDataLake.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 6ad8689a9b4..fe6e5b3e593 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include From 7c9f36ad1ea1e6cc1d480c44a94c9e473f3a27e0 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 22 May 2024 19:46:08 +0200 Subject: [PATCH 533/651] Add gh to style-check dockerfile --- docker/test/style/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 5d53d03606f..172fbce6406 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ aspell \ curl \ git \ + gh \ file \ libxml2-utils \ moreutils \ From 6be79a35b6a55e88103056058ce9833ac62be77e Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 22 May 2024 20:30:19 +0200 Subject: [PATCH 534/651] update contrib/aws to the last head --- contrib/aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws b/contrib/aws index b7ae6e5bf48..eb96e740453 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit b7ae6e5bf48fb4981f24476bdd187cd35df1e2c6 +Subproject commit eb96e740453ae27afa1f367ba19f99bdcb38484d From 7ecfdbb3aaf4b7f4a68d6a332138dd90612e6120 Mon Sep 17 00:00:00 2001 From: Mikhail Artemenko Date: Wed, 22 May 2024 23:05:27 +0000 Subject: [PATCH 535/651] fix test_hdfsCluster_unset_skip_unavailable_shards --- tests/integration/test_storage_hdfs/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index bb72574c6e5..3c43918d8c0 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -895,7 +895,7 @@ def test_hdfsCluster_unset_skip_unavailable_shards(started_cluster): assert ( node1.query( - "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/skip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')" + "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/unskip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')" ) == data ) From c07c9d4c87efa2d4823526127bd52566773a2cd3 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 22 May 2024 21:57:43 -0300 Subject: [PATCH 536/651] test for #45804 --- ...l_and_prewhere_condition_ver_column.reference | 2 ++ ...1_final_and_prewhere_condition_ver_column.sql | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference create mode 100644 tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql diff --git a/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql new file mode 100644 index 00000000000..78a58a979d1 --- /dev/null +++ b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql @@ -0,0 +1,16 @@ +SET allow_experimental_analyzer = 1; + +-- https://github.com/ClickHouse/ClickHouse/issues/45804 + +CREATE TABLE myRMT( + key Int64, + someCol String, + ver DateTime +) ENGINE = ReplacingMergeTree(ver) +ORDER BY key as SELECT 1, 'test', '2020-01-01'; + +SELECT count(ver) FROM myRMT FINAL PREWHERE ver > '2000-01-01'; + +SELECT count() FROM myRMT FINAL PREWHERE ver > '2000-01-01'; + +DROP TABLE myRMT; From 88ae74f6fdd3d859674a588b8b6fba320d214950 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 09:28:38 +0200 Subject: [PATCH 537/651] Add test for reinterpretXYZ --- .../functions/type-conversion-functions.md | 3 +- .../03156_reinterpret_functions.sql | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03156_reinterpret_functions.sql diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 14a12ab5d5d..1030d92c76b 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1000,7 +1000,8 @@ Result: ## reinterpretAsInt(8\|16\|32\|64) -## reinterpretAsFloat(32\|64) +## reinterpretAsFloat* + ## reinterpretAsDate diff --git a/tests/queries/0_stateless/03156_reinterpret_functions.sql b/tests/queries/0_stateless/03156_reinterpret_functions.sql new file mode 100644 index 00000000000..4acaaf47cef --- /dev/null +++ b/tests/queries/0_stateless/03156_reinterpret_functions.sql @@ -0,0 +1,36 @@ +-- Date and DateTime + +SELECT reinterpretAsDate(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsDate('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsDate([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT reinterpretAsDateTime(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsDateTime('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsDateTime([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} + +SELECT reinterpretAsDate(65); +SELECT reinterpretAsDate('A'); +SELECT reinterpretAsDateTime(65); +SELECT reinterpretAsDate('A'); + +-- Fixed String + +SELECT reinterpretAsFixedString(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsFixedString(toDate('1970-01-01'),''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsFixedString([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} + +SELECT reinterpretAsFixedString(toDate('1970-03-07')); +SELECT reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')); +SELECT reinterpretAsFixedString(65); + +-- Float32, Float64 + +SELECT reinterpretAsFloat32(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsFloat64(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsFloat32('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsFloat64('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT reinterpretAsFloat32([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT reinterpretAsFloat64([0, 1, 2]); -- { clientError4 ILLEGAL_TYPE_OF_ARGUMENT} + + + + From 9234beaff8ef19ed758984fb70c82b4edb3762f0 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 09:32:43 +0200 Subject: [PATCH 538/651] Fix typo and move from other-functions to math-functions --- .../sql-reference/functions/math-functions.md | 46 +++++++++++++++++++ .../functions/other-functions.md | 46 ------------------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 945166056af..324adbfb4b3 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -947,3 +947,49 @@ Result: │ 11 │ └──────────────────────────────────┘ ``` + +## proportionsZTest + +Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`. + +**Syntax** + +```sql +proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type) +``` + +**Arguments** + +- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md). +- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md). +- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md). +- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md). +- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md). +- `pool_type`: Selection of pooling (way in which the standard error is estimated). Can be either `unpooled` or `pooled`. [String](../data-types/string.md). + +:::note +For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately. +::: + +**Returned value** + +- `z_stat`: Z statistic. [Float64](../data-types/float.md). +- `p_val`: P value. [Float64](../data-types/float.md). +- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md). +- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); +``` + +Result: + +```response +┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐ +│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 288432167bb..2b0215115cb 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -903,52 +903,6 @@ SELECT parseTimeDelta('1yr2mo') └──────────────────────────┘ ``` -## proportionsZTest - -Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`. - -**Syntax** - -```sql -proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type) -``` - -**Arguments** - -- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md). -- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md). -- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md). -- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md). -- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md). -- `pool_type`: Selection of pooling (way in which the standard error is estimated). can be either `unpooled` or `pooled`. [String](../data-types/string.md). - -:::note -For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately. -::: - -**Returned value** - -- `z_stat`: Z statistic. [Float64](../data-types/float.md). -- `p_val`: P value. [Float64](../data-types/float.md). -- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md). -- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md). - -**Example** - -Query: - -```sql -SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled'); -``` - -Result: - -```response -┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐ -│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │ -└────────────────────────────────────────────────────────────────────────────────────┘ -``` - ## least(a, b) Returns the smaller value of a and b. From 45492baf440418267c8187607650a6ceddc061d3 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Thu, 23 May 2024 08:20:16 +0000 Subject: [PATCH 539/651] Restart Ci From a21377cf5131de31e2109c117774fdb8058e8bc9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 May 2024 11:51:34 +0200 Subject: [PATCH 540/651] Update src/Analyzer/Passes/QueryAnalysisPass.cpp Co-authored-by: Dmitry Novik --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index cfea45732db..3ccecac951d 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -638,7 +638,10 @@ struct ScopeAliases auto it = alias_map.find(*key); - if (it == alias_map.end() && lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION) + if (it != alias_map.end()) + return &it->second; + + if (lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION) return {}; while (it == alias_map.end()) From 9d63095db9445f4963da914ddbc819b0a57bc7e2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 16 Apr 2024 12:55:50 +0000 Subject: [PATCH 541/651] Revert "Revert "Speed up `splitByRegexp`"" This reverts commit 08e5c2ba4d9620551b0de5791876d35888d2c81a. --- src/Functions/splitByRegexp.cpp | 66 ++++++++++++++++++- tests/performance/function_tokens.xml | 2 + .../01866_split_by_regexp.reference | 12 ++++ .../0_stateless/01866_split_by_regexp.sql | 17 +++++ 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index 32afb813a04..e28fe9c38bb 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -1,9 +1,11 @@ #include +#include +#include #include #include -#include #include #include +#include #include @@ -102,7 +104,7 @@ public: return false; } - pos += 1; + ++pos; token_end = pos; ++splits; } @@ -148,11 +150,69 @@ public: using FunctionSplitByRegexp = FunctionTokens; +/// Fallback splitByRegexp to splitByChar when its 1st argument is a trivial char for better performance +class SplitByRegexpOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "splitByRegexp"; + static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } + + explicit SplitByRegexpOverloadResolver(ContextPtr context_) + : context(context_) + , split_by_regexp(FunctionSplitByRegexp::create(context)) {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return SplitByRegexpImpl::getNumberOfArguments(); } + bool isVariadic() const override { return SplitByRegexpImpl::isVariadic(); } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + if (patternIsTrivialChar(arguments)) + return FunctionFactory::instance().getImpl("splitByChar", context)->build(arguments); + else + return std::make_unique( + split_by_regexp, collections::map(arguments, [](const auto & elem) { return elem.type; }), return_type); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return split_by_regexp->getReturnTypeImpl(arguments); + } + +private: + bool patternIsTrivialChar(const ColumnsWithTypeAndName & arguments) const + { + const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get()); + if (!col) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}. " + "Must be constant string.", + arguments[0].column->getName(), + getName()); + + String pattern = col->getValue(); + if (pattern.size() == 1) + { + OptimizedRegularExpression re = Regexps::createRegexp(pattern); + + std::string required_substring; + bool is_trivial; + bool required_substring_is_prefix; + re.getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix); + return is_trivial && required_substring == pattern; + } + return false; + } + + ContextPtr context; + FunctionPtr split_by_regexp; +}; } REGISTER_FUNCTION(SplitByRegexp) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/tests/performance/function_tokens.xml b/tests/performance/function_tokens.xml index 63b72f83df3..1ff56323d62 100644 --- a/tests/performance/function_tokens.xml +++ b/tests/performance/function_tokens.xml @@ -1,3 +1,5 @@ with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000) + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000) + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000) diff --git a/tests/queries/0_stateless/01866_split_by_regexp.reference b/tests/queries/0_stateless/01866_split_by_regexp.reference index a3ae2f35a5f..62939940545 100644 --- a/tests/queries/0_stateless/01866_split_by_regexp.reference +++ b/tests/queries/0_stateless/01866_split_by_regexp.reference @@ -5,3 +5,15 @@ ['gbye','bug'] [''] [] +Test fallback of splitByRegexp to splitByChar if regexp is trivial +['a','b','c'] +['a','b','c'] +['','','','','',''] +['a^b^c'] +['a$b$c'] +['a)b)c'] +['a','b','c'] +['a','b','c'] +['a','b','c'] +['a|b|c'] +['a\\b\\c'] diff --git a/tests/queries/0_stateless/01866_split_by_regexp.sql b/tests/queries/0_stateless/01866_split_by_regexp.sql index e472fb68d94..570bd1ba5c0 100644 --- a/tests/queries/0_stateless/01866_split_by_regexp.sql +++ b/tests/queries/0_stateless/01866_split_by_regexp.sql @@ -3,3 +3,20 @@ select splitByRegexp('', 'abcde'); select splitByRegexp('<[^<>]*>', x) from (select arrayJoin(['

hello

world

', 'gbyebug']) x); select splitByRegexp('ab', ''); select splitByRegexp('', ''); + +SELECT 'Test fallback of splitByRegexp to splitByChar if regexp is trivial'; +select splitByRegexp(' ', 'a b c'); +select splitByRegexp('-', 'a-b-c'); +select splitByRegexp('.', 'a.b.c'); +select splitByRegexp('^', 'a^b^c'); +select splitByRegexp('$', 'a$b$c'); +select splitByRegexp('+', 'a+b+c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp('?', 'a?b?c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp('(', 'a(b(c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp(')', 'a)b)c'); +select splitByRegexp('[', 'a[b[c'); -- { serverError CANNOT_COMPILE_REGEXP } +select splitByRegexp(']', 'a]b]c'); +select splitByRegexp('{', 'a{b{c'); +select splitByRegexp('}', 'a}b}c'); +select splitByRegexp('|', 'a|b|c'); +select splitByRegexp('\\', 'a\\b\\c'); From 00bbffa6f056348a9252ca178edfee580a1939d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 23 May 2024 11:04:29 +0000 Subject: [PATCH 542/651] Update autogenerated version to 24.6.1.1 and contributors --- cmake/autogenerated_versions.txt | 10 +++---- .../StorageSystemContributors.generated.cpp | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index f8ff71876c6..dfbbb66a1e9 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54486) +SET(VERSION_REVISION 54487) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 5) +SET(VERSION_MINOR 6) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 6d4b31322d168356c8b10c43b4cef157c82337ff) -SET(VERSION_DESCRIBE v24.5.1.1-testing) -SET(VERSION_STRING 24.5.1.1) +SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0) +SET(VERSION_DESCRIBE v24.6.1.1-testing) +SET(VERSION_STRING 24.6.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 909599c00af..b42b070d518 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -48,6 +48,7 @@ const char * auto_contributors[] { "Alex Cao", "Alex Cheng", "Alex Karo", + "Alex Katsman", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", @@ -101,6 +102,7 @@ const char * auto_contributors[] { "Alexey Korepanov", "Alexey Milovidov", "Alexey Perevyshin", + "Alexey Petrunyaka", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", @@ -109,6 +111,7 @@ const char * auto_contributors[] { "AlfVII", "Alfonso Martinez", "Alfred Xu", + "Ali", "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", @@ -250,6 +253,7 @@ const char * auto_contributors[] { "Brian Hunter", "Brokenice0415", "Bulat Gaifullin", + "Caio Ricciuti", "Camden Cheek", "Camilo Sierra", "Carbyn", @@ -384,6 +388,7 @@ const char * auto_contributors[] { "Evgenii Pravda", "Evgeniia Sudarikova", "Evgeniy Gatov", + "Evgeniy Leko", "Evgeniy Udodov", "Evgeny", "Evgeny Konkov", @@ -413,6 +418,7 @@ const char * auto_contributors[] { "Fille", "Flowyi", "Francisco Barón", + "Francisco Javier Jurado Moreno", "Frank Chen", "Frank Zhao", "François Violette", @@ -425,6 +431,7 @@ const char * auto_contributors[] { "G5.Qin", "Gabriel", "Gabriel Archer", + "Gabriel Martinez", "Gagan Arneja", "Gagan Goel", "Gao Qiang", @@ -446,6 +453,7 @@ const char * auto_contributors[] { "Grigory Buteyko", "Grigory Pervakov", "GruffGemini", + "Grégoire Pineau", "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", @@ -587,6 +595,7 @@ const char * auto_contributors[] { "Keiji Yoshida", "Ken Chen", "Ken MacInnis", + "KenL", "Kenji Noguchi", "Kerry Clendinning", "Kevin Chiang", @@ -640,6 +649,7 @@ const char * auto_contributors[] { "Leonardo Maciel", "Leonid Krylov", "Leopold Schabel", + "Leticia Webb", "Lev Borodin", "Lewinma", "Li Shuai", @@ -701,6 +711,7 @@ const char * auto_contributors[] { "Masha", "Mathieu Rey", "Matthew Peveler", + "Mattias Naarttijärvi", "Matwey V. Kornilov", "Max", "Max Akhmedov", @@ -711,6 +722,7 @@ const char * auto_contributors[] { "MaxTheHuman", "MaxWk", "Maxim Akhmedov", + "Maxim Alexeev", "Maxim Babenko", "Maxim Fedotov", "Maxim Fridental", @@ -739,6 +751,7 @@ const char * auto_contributors[] { "Michael Razuvaev", "Michael Schnerring", "Michael Smitasin", + "Michael Stetsyuk", "Michail Safronov", "Michal Lisowski", "MicrochipQ", @@ -879,6 +892,7 @@ const char * auto_contributors[] { "Pavlo Bashynskiy", "Pawel Rog", "Paweł Kudzia", + "Pazitiff9", "Peignon Melvyn", "Peng Jian", "Peng Liu", @@ -1084,6 +1098,7 @@ const char * auto_contributors[] { "Tom Bombadil", "Tom Risse", "Tomas Barton", + "Tomer Shafir", "Tomáš Hromada", "Tristan", "Tsarkova Anastasia", @@ -1123,6 +1138,7 @@ const char * auto_contributors[] { "Victor Krasnov", "Victor Tarnavsky", "Viktor Taranenko", + "Vinay Suryadevara", "Vincent", "Vincent Bernat", "Vitalii S", @@ -1162,6 +1178,9 @@ const char * auto_contributors[] { "Vladislav Smirnov", "Vladislav V", "Vojtech Splichal", + "Volodya", + "Volodya Giro", + "Volodyachan", "Volodymyr Kuznetsov", "Vsevolod Orlov", "Vxider", @@ -1179,6 +1198,7 @@ const char * auto_contributors[] { "XenoAmess", "Xianda Ke", "Xiang Zhou", + "Xiaofei Hu", "Xin Wang", "Xoel Lopez Barata", "Xudong Zhang", @@ -1224,6 +1244,7 @@ const char * auto_contributors[] { "Zhipeng", "Zhuo Qiu", "Zijie Lu", + "Zimu Li", "Ziy1-Tan", "Zoran Pandovski", "[데이터플랫폼팀] 이호선", @@ -1490,6 +1511,7 @@ const char * auto_contributors[] { "jiyoungyoooo", "jktng", "jkuklis", + "joe09@foxmail.com", "joelynch", "johanngan", "johnnymatthews", @@ -1658,6 +1680,7 @@ const char * auto_contributors[] { "ongkong", "orantius", "p0ny", + "p1rattttt", "palasonicq", "palegre-tiny", "pawelsz-rb", @@ -1667,6 +1690,7 @@ const char * auto_contributors[] { "pedro.riera", "pengxiangcai", "peshkurov", + "pet74alex", "peter279k", "philip.han", "pingyu", @@ -1680,6 +1704,7 @@ const char * auto_contributors[] { "pyos", "pzhdfy", "qaziqarta", + "qiangxuhui", "qianlixiang", "qianmoQ", "qieqieplus", @@ -1793,6 +1818,7 @@ const char * auto_contributors[] { "unknown", "urgordeadbeef", "usurai", + "v01dxyz", "vahid-sohrabloo", "vdimir", "velavokr", @@ -1802,6 +1828,7 @@ const char * auto_contributors[] { "vic", "vicdashkov", "vicgao", + "vinay92-ch", "vinity", "vitac", "vitstn", @@ -1818,6 +1845,7 @@ const char * auto_contributors[] { "weeds085490", "whysage", "wineternity", + "woodlzm", "wuxiaobai24", "wxybear", "wzl", @@ -1877,6 +1905,7 @@ const char * auto_contributors[] { "zhenjial", "zhifeng", "zhongyuankai", + "zhou", "zhoubintao", "zhukai", "zimv", @@ -1891,6 +1920,7 @@ const char * auto_contributors[] { "zxealous", "zy-kkk", "zzsmdfj", + "zzyReal666", "Šimon Podlipský", "Александр", "Александр Нам", From 299f0886bfda27e375be3edf9042af513cbf99c8 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 23 May 2024 13:48:17 +0200 Subject: [PATCH 543/651] Followup for #63691 --- src/Processors/Transforms/SquashingChunksTransform.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 267490dc89e..ed67dd508f3 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -71,7 +71,9 @@ Chunk SimpleSquashingChunksTransform::generate() if (squashed_chunk.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - return std::move(squashed_chunk); + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; } bool SimpleSquashingChunksTransform::canGenerate() @@ -83,7 +85,10 @@ Chunk SimpleSquashingChunksTransform::getRemaining() { Block current_block = squashing.add({}); squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); - return std::move(squashed_chunk); + + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; } } From f1c191a3cb2d2037de4346683fbc90a58a98a8a6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 May 2024 13:48:23 +0200 Subject: [PATCH 544/651] Better --- .../ObjectStorage/Azure/Configuration.cpp | 4 ++++ .../ObjectStorage/ReadBufferIterator.cpp | 23 +++++++++++------- .../ObjectStorage/ReadBufferIterator.h | 3 ++- .../StorageObjectStorageSource.cpp | 20 +++++++--------- .../StorageObjectStorageSource.h | 5 ++-- src/Storages/S3Queue/S3QueueSource.cpp | 24 ++++++++++--------- 6 files changed, 44 insertions(+), 35 deletions(-) diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index cca94488a30..ada3e2e9323 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -100,6 +100,10 @@ AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(Contex settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); + settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; + settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; + settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; + settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; return settings_ptr; } diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 5a8a4735fe1..50d69129883 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -35,9 +35,10 @@ ReadBufferIterator::ReadBufferIterator( format = configuration->format; } -SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path, const String & format_name) const +SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const { - auto source = std::filesystem::path(configuration->getDataSourceDescription()) / path; + chassert(!object_info.getPath().starts_with("/")); + auto source = std::filesystem::path(configuration->getDataSourceDescription()) / object_info.getPath(); return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext()); } @@ -50,6 +51,7 @@ SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const std::back_inserter(sources), [&](const auto & elem) { + chassert(!elem->getPath().starts_with("/")); return std::filesystem::path(configuration->getDataSourceDescription()) / elem->getPath(); }); return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); @@ -78,7 +80,7 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( if (format) { - auto cache_key = getKeyForSchemaCache(object_info->getPath(), *format); + const auto cache_key = getKeyForSchemaCache(*object_info, *format); if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) return columns; } @@ -89,7 +91,7 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( /// If we have such entry for some format, we can use this format to read the file. for (const auto & format_name : FormatFactory::instance().getAllInputFormats()) { - auto cache_key = getKeyForSchemaCache(object_info->getPath(), format_name); + const auto cache_key = getKeyForSchemaCache(*object_info, format_name); if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time)) { /// Now format is known. It should be the same for all files. @@ -99,14 +101,13 @@ std::optional ReadBufferIterator::tryGetColumnsFromCache( } } } - return std::nullopt; } void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows) { if (query_settings.schema_inference_use_cache) - schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->getPath(), *format), num_rows); + schema_cache.addNumRows(getKeyForSchemaCache(*current_object_info, *format), num_rows); } void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) @@ -114,7 +115,7 @@ void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns) if (query_settings.schema_inference_use_cache && query_settings.schema_inference_mode == SchemaInferenceMode::UNION) { - schema_cache.addColumns(getKeyForSchemaCache(current_object_info->getPath(), *format), columns); + schema_cache.addColumns(getKeyForSchemaCache(*current_object_info, *format), columns); } } @@ -135,7 +136,7 @@ void ReadBufferIterator::setFormatName(const String & format_name) String ReadBufferIterator::getLastFileName() const { if (current_object_info) - return current_object_info->getFileName(); + return current_object_info->getPath(); else return ""; } @@ -255,17 +256,21 @@ ReadBufferIterator::Data ReadBufferIterator::next() } } + LOG_TEST(getLogger("KSSENII"), "Will read columns from {}", current_object_info->getPath()); + std::unique_ptr read_buf; CompressionMethod compression_method; using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; if (const auto * object_info_in_archive = dynamic_cast(current_object_info.get())) { - compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method); + LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from archive", current_object_info->getPath()); + compression_method = chooseCompressionMethod(filename, configuration->compression_method); const auto & archive_reader = object_info_in_archive->archive_reader; read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); } else { + LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from s3", current_object_info->getPath()); compression_method = chooseCompressionMethod(filename, configuration->compression_method); read_buf = object_storage->readObject( StoredObject(current_object_info->getPath()), diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 287e316e243..6eeb52ec2ed 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -13,6 +13,7 @@ public: using FileIterator = std::shared_ptr; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr; + using ObjectInfo = StorageObjectStorage::ObjectInfo; using ObjectInfos = StorageObjectStorage::ObjectInfos; ReadBufferIterator( @@ -41,7 +42,7 @@ public: std::unique_ptr recreateLastReadBuffer() override; private: - SchemaCache::Key getKeyForSchemaCache(const String & path, const String & format_name) const; + SchemaCache::Key getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const; SchemaCache::Keys getKeysForSchemaCache() const; std::optional tryGetColumnsFromCache( const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index a2b3ca5b69e..7332574b246 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -183,14 +183,14 @@ Chunk StorageObjectStorageSource::generate() VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, - fs::path(configuration->getNamespace()) / reader.getRelativePath(), + fs::path(configuration->getNamespace()) / reader.getObjectInfo().getPath(), object_info.metadata->size_bytes, &filename); return chunk; } if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getRelativePath(), total_rows_in_file); + addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file); total_rows_in_file = 0; @@ -209,29 +209,28 @@ Chunk StorageObjectStorageSource::generate() return {}; } -void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t num_rows) +void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows) { const auto cache_key = getKeyForSchemaCache( - fs::path(configuration->getDataSourceDescription()) / path, + fs::path(configuration->getDataSourceDescription()) / object_info.getPath(), configuration->format, format_settings, getContext()); - schema_cache.addNumRows(cache_key, num_rows); } -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info) +std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) { const auto cache_key = getKeyForSchemaCache( - fs::path(configuration->getDataSourceDescription()) / object_info->getPath(), + fs::path(configuration->getDataSourceDescription()) / object_info.getPath(), configuration->format, format_settings, getContext()); auto get_last_mod_time = [&]() -> std::optional { - return object_info->metadata - ? std::optional(object_info->metadata->last_modified.epochTime()) + return object_info.metadata + ? std::optional(object_info.metadata->last_modified.epochTime()) : std::nullopt; }; return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); @@ -263,7 +262,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade std::optional num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(object_info) + ? tryGetNumRowsFromCache(*object_info) : std::nullopt; if (num_rows_from_cache) @@ -505,7 +504,6 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne index = 0; - LOG_TEST(logger, "Filter: {}", filter_dag != nullptr); if (filter_dag) { std::vector paths; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 8dbb31fdfba..e9635ff4dce 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -94,7 +94,6 @@ protected: PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - std::string getRelativePath() const { return object_info->getPath(); } const ObjectInfo & getObjectInfo() const { return *object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } @@ -115,8 +114,8 @@ protected: std::future createReaderAsync(size_t processor = 0); std::unique_ptr createReadBuffer(const ObjectInfo & object_info); - void addNumRowsToCache(const String & path, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfoPtr & object_info); + void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); + std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); void lazyInitialize(size_t processor); }; diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 458f681d7b5..c8aaece0711 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -238,12 +238,14 @@ Chunk StorageS3QueueSource::generate() key_with_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false); } break; } + const auto & path = reader.getObjectInfo().getPath(); + if (shutdown_called) { if (processed_rows_from_file == 0) @@ -253,7 +255,7 @@ Chunk StorageS3QueueSource::generate() { LOG_DEBUG( log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed", - processed_rows_from_file, reader.getRelativePath()); + processed_rows_from_file, path); try { @@ -265,7 +267,7 @@ Chunk StorageS3QueueSource::generate() key_with_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); + appendLogElement(path, *file_status, processed_rows_from_file, false); /// Leave the file half processed. Table is being dropped, so we do not care. break; @@ -273,7 +275,7 @@ Chunk StorageS3QueueSource::generate() LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). " "Will process the file fully and then shutdown", - reader.getRelativePath(), processed_rows_from_file); + path, processed_rows_from_file); } auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); @@ -287,31 +289,31 @@ Chunk StorageS3QueueSource::generate() Chunk chunk; if (reader->pull(chunk)) { - LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getRelativePath()); + LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), path); file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata->size_bytes); + chunk, requested_virtual_columns, path, reader.getObjectInfo().metadata->size_bytes); return chunk; } } catch (...) { const auto message = getCurrentExceptionMessage(true); - LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getRelativePath(), message); + LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); files_metadata->setFileFailed(key_with_info->processing_holder, message); - appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false); + appendLogElement(path, *file_status, processed_rows_from_file, false); throw; } files_metadata->setFileProcessed(key_with_info->processing_holder); - applyActionAfterProcessing(reader.getRelativePath()); + applyActionAfterProcessing(path); - appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, true); + appendLogElement(path, *file_status, processed_rows_from_file, true); file_status.reset(); processed_rows_from_file = 0; @@ -327,7 +329,7 @@ Chunk StorageS3QueueSource::generate() if (!reader) break; - file_status = files_metadata->getFileStatus(reader.getRelativePath()); + file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath()); /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. From c150c20512afef6ae816606f197b1ab0a2160712 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 23 May 2024 13:53:36 +0200 Subject: [PATCH 545/651] adjust tests in test_merge_tree_s3 --- tests/integration/test_merge_tree_s3/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 9216b08f942..0bf81e81383 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -857,9 +857,9 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli error = node.query_and_get_error( "OPTIMIZE TABLE test_merge_canceled_by_s3_errors FINAL", ) - assert "ExpectedError Message: mock s3 injected error" in error, error + assert "ExpectedError Message: mock s3 injected unretryable error" in error, error - node.wait_for_log_line("ExpectedError Message: mock s3 injected error") + node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error") table_uuid = node.query( "SELECT uuid FROM system.tables WHERE database = 'default' AND name = 'test_merge_canceled_by_s3_errors' LIMIT 1" @@ -867,7 +867,7 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli node.query("SYSTEM FLUSH LOGS") error_count_in_blob_log = node.query( - f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected error%'" + f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected unretryable error%'" ).strip() assert int(error_count_in_blob_log) > 0, node.query( f"SELECT * FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' FORMAT PrettyCompactMonoBlock" @@ -911,7 +911,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): node.query("OPTIMIZE TABLE merge_canceled_by_s3_errors_when_move FINAL") - node.wait_for_log_line("ExpectedError Message: mock s3 injected error") + node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error") count = node.query("SELECT count() FROM merge_canceled_by_s3_errors_when_move") assert int(count) == 2000, count From ce26c4f65746ec3058f1639f83b675feef4fda1c Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 13:54:45 +0200 Subject: [PATCH 546/651] =?UTF-8?q?Review=20changes=20and=20replace=20?= =?UTF-8?q?=E2=80=A6=20with=20...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../template-setting.md | 2 +- docs/changelogs/v20.7.1.4310-prestable.md | 2 +- docs/changelogs/v21.12.1.9017-prestable.md | 2 +- docs/changelogs/v21.3.3.14-lts.md | 2 +- docs/changelogs/v21.4.1.6422-prestable.md | 2 +- docs/changelogs/v21.4.2.10-prestable.md | 2 +- docs/changelogs/v22.6.1.1985-stable.md | 4 +- docs/changelogs/v22.7.1.2484-stable.md | 2 +- docs/changelogs/v22.8.13.20-lts.md | 2 +- docs/changelogs/v23.11.1.2711-stable.md | 2 +- docs/changelogs/v23.12.1.1368-stable.md | 2 +- docs/changelogs/v23.3.1.2823-lts.md | 2 +- docs/changelogs/v23.5.1.3174-stable.md | 2 +- docs/changelogs/v23.8.1.2992-lts.md | 2 +- docs/changelogs/v24.1.3.31-stable.md | 2 +- docs/changelogs/v24.2.1.2248-stable.md | 2 +- docs/changelogs/v24.3.1.2672-lts.md | 2 +- docs/en/development/style.md | 6 +- .../table-engines/integrations/hdfs.md | 2 +- .../engines/table-engines/integrations/s3.md | 2 +- .../custom-partitioning-key.md | 2 +- .../mergetree-family/mergetree.md | 4 +- .../table-engines/special/external-data.md | 2 +- .../operations/settings/query-complexity.md | 4 +- docs/en/operations/settings/settings.md | 2 +- .../parametric-functions.md | 4 +- .../reference/quantiles.md | 2 +- .../data-types/aggregatefunction.md | 4 +- .../sql-reference/data-types/fixedstring.md | 4 +- .../nested-data-structures/index.md | 2 +- .../data-types/simpleaggregatefunction.md | 2 +- .../functions/arithmetic-functions.md | 54 ++++++++++++ .../functions/array-functions.md | 84 +++++++++---------- .../functions/date-time-functions.md | 2 +- .../sql-reference/functions/json-functions.md | 24 +++--- .../functions/other-functions.md | 62 +------------- .../functions/string-replace-functions.md | 2 +- .../functions/string-search-functions.md | 12 +-- .../functions/tuple-functions.md | 6 +- .../functions/tuple-map-functions.md | 4 +- .../sql-reference/functions/url-functions.md | 2 +- .../sql-reference/statements/alter/comment.md | 2 +- .../sql-reference/statements/alter/delete.md | 2 +- .../sql-reference/statements/alter/index.md | 2 +- .../sql-reference/statements/alter/update.md | 2 +- .../en/sql-reference/statements/alter/view.md | 6 +- .../sql-reference/statements/create/view.md | 2 +- .../sql-reference/statements/insert-into.md | 2 +- .../sql-reference/statements/select/limit.md | 4 +- .../statements/select/order-by.md | 2 +- docs/en/sql-reference/table-functions/file.md | 2 +- docs/en/sql-reference/table-functions/gcs.md | 2 +- docs/en/sql-reference/table-functions/hdfs.md | 2 +- docs/en/sql-reference/table-functions/s3.md | 2 +- docs/ru/development/style.md | 8 +- .../table-engines/integrations/hdfs.md | 2 +- .../engines/table-engines/integrations/s3.md | 2 +- .../custom-partitioning-key.md | 2 +- .../mergetree-family/mergetree.md | 4 +- .../table-engines/special/external-data.md | 2 +- docs/ru/faq/general/olap.md | 6 +- .../example-datasets/nyc-taxi.md | 2 +- docs/ru/index.md | 12 +-- .../operations/settings/query-complexity.md | 4 +- docs/ru/operations/settings/settings.md | 2 +- .../parametric-functions.md | 4 +- .../reference/quantiles.md | 2 +- .../data-types/aggregatefunction.md | 4 +- .../sql-reference/data-types/fixedstring.md | 4 +- .../nested-data-structures/nested.md | 2 +- docs/ru/sql-reference/data-types/tuple.md | 2 +- .../functions/array-functions.md | 40 ++++----- .../functions/date-time-functions.md | 2 +- .../sql-reference/functions/json-functions.md | 24 +++--- .../functions/other-functions.md | 2 +- .../functions/string-functions.md | 2 +- .../functions/string-search-functions.md | 18 ++-- .../functions/tuple-functions.md | 6 +- .../sql-reference/functions/url-functions.md | 2 +- .../sql-reference/statements/alter/comment.md | 2 +- .../sql-reference/statements/alter/delete.md | 2 +- .../sql-reference/statements/alter/index.md | 2 +- .../sql-reference/statements/alter/update.md | 2 +- .../ru/sql-reference/statements/alter/view.md | 4 +- .../sql-reference/statements/create/view.md | 2 +- .../sql-reference/statements/insert-into.md | 2 +- docs/ru/sql-reference/table-functions/file.md | 2 +- docs/ru/sql-reference/table-functions/s3.md | 2 +- docs/zh/changelog/index.md | 4 +- docs/zh/development/style.md | 8 +- .../table-engines/integrations/hdfs.md | 2 +- .../engines/table-engines/integrations/s3.md | 4 +- .../custom-partitioning-key.md | 2 +- .../mergetree-family/mergetree.md | 4 +- .../table-engines/special/external-data.md | 2 +- docs/zh/faq/general/olap.md | 6 +- .../example-datasets/nyc-taxi.md | 2 +- .../example-datasets/uk-price-paid.mdx | 2 +- .../sparse-primary-indexes.md | 2 +- docs/zh/index.md | 12 +-- .../operations/settings/query-complexity.md | 4 +- docs/zh/operations/settings/settings.md | 2 +- .../operations/system-tables/dictionaries.md | 2 +- .../parametric-functions.md | 4 +- .../reference/quantiles.md | 2 +- .../data-types/aggregatefunction.md | 2 +- .../sql-reference/data-types/domains/index.md | 4 +- .../sql-reference/data-types/fixedstring.md | 4 +- .../nested-data-structures/nested.md | 2 +- .../data-types/simpleaggregatefunction.md | 2 +- docs/zh/sql-reference/data-types/tuple.md | 2 +- .../functions/array-functions.md | 40 ++++----- .../functions/date-time-functions.md | 2 +- .../functions/higher-order-functions.md | 22 ++--- .../sql-reference/functions/in-functions.md | 4 +- .../sql-reference/functions/json-functions.md | 24 +++--- .../functions/other-functions.md | 2 +- .../functions/string-functions.md | 6 +- .../functions/string-search-functions.md | 18 ++-- .../sql-reference/functions/url-functions.md | 2 +- .../sql-reference/statements/alter/delete.md | 2 +- .../sql-reference/statements/alter/index.md | 2 +- .../sql-reference/statements/alter/update.md | 2 +- .../zh/sql-reference/statements/alter/view.md | 4 +- .../sql-reference/statements/create/view.md | 2 +- .../sql-reference/statements/insert-into.md | 2 +- .../sql-reference/statements/select/limit.md | 4 +- .../statements/select/order-by.md | 2 +- docs/zh/sql-reference/table-functions/file.md | 2 +- docs/zh/sql-reference/table-functions/hdfs.md | 2 +- docs/zh/sql-reference/table-functions/s3.md | 2 +- 131 files changed, 384 insertions(+), 384 deletions(-) diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md index fc912aba3e1..f4525d872df 100644 --- a/docs/_description_templates/template-setting.md +++ b/docs/_description_templates/template-setting.md @@ -2,7 +2,7 @@ Description. -For the switch setting, use the typical phrase: “Enables or disables something …”. +For the switch setting, use the typical phrase: “Enables or disables something ...”. Possible values: diff --git a/docs/changelogs/v20.7.1.4310-prestable.md b/docs/changelogs/v20.7.1.4310-prestable.md index f47c7334228..aa1d993b263 100644 --- a/docs/changelogs/v20.7.1.4310-prestable.md +++ b/docs/changelogs/v20.7.1.4310-prestable.md @@ -166,4 +166,4 @@ * NO CL ENTRY: 'Revert "Abort on std::out_of_range in debug builds"'. [#12752](https://github.com/ClickHouse/ClickHouse/pull/12752) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Bump protobuf from 3.12.2 to 3.12.4 in /docs/tools'. [#13102](https://github.com/ClickHouse/ClickHouse/pull/13102) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)). * NO CL ENTRY: 'Merge [#12574](https://github.com/ClickHouse/ClickHouse/issues/12574)'. [#13158](https://github.com/ClickHouse/ClickHouse/pull/13158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer…"'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer..."'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v21.12.1.9017-prestable.md b/docs/changelogs/v21.12.1.9017-prestable.md index 88b8260e312..bd84873e67a 100644 --- a/docs/changelogs/v21.12.1.9017-prestable.md +++ b/docs/changelogs/v21.12.1.9017-prestable.md @@ -421,5 +421,5 @@ sidebar_label: 2022 * Fix possible crash in DataTypeAggregateFunction [#32287](https://github.com/ClickHouse/ClickHouse/pull/32287) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Update backport.py [#32323](https://github.com/ClickHouse/ClickHouse/pull/32323) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix graphite-bench build [#32351](https://github.com/ClickHouse/ClickHouse/pull/32351) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Revert "graphite: split tagged/plain rollup rules (for merges perfoma… [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "graphite: split tagged/plain rollup rules (for merges perfoma... [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Another attempt to fix unit test Executor::RemoveTasksStress [#32390](https://github.com/ClickHouse/ClickHouse/pull/32390) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). diff --git a/docs/changelogs/v21.3.3.14-lts.md b/docs/changelogs/v21.3.3.14-lts.md index 57bde602f21..91d99deaa6b 100644 --- a/docs/changelogs/v21.3.3.14-lts.md +++ b/docs/changelogs/v21.3.3.14-lts.md @@ -18,4 +18,4 @@ sidebar_label: 2022 #### NOT FOR CHANGELOG / INSIGNIFICANT -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). diff --git a/docs/changelogs/v21.4.1.6422-prestable.md b/docs/changelogs/v21.4.1.6422-prestable.md index 2eadb0d4754..66937c3be15 100644 --- a/docs/changelogs/v21.4.1.6422-prestable.md +++ b/docs/changelogs/v21.4.1.6422-prestable.md @@ -223,7 +223,7 @@ sidebar_label: 2022 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)). * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)). * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)). * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)). diff --git a/docs/changelogs/v21.4.2.10-prestable.md b/docs/changelogs/v21.4.2.10-prestable.md index 3db17ddfcf3..b9bdbd80c0c 100644 --- a/docs/changelogs/v21.4.2.10-prestable.md +++ b/docs/changelogs/v21.4.2.10-prestable.md @@ -226,7 +226,7 @@ sidebar_label: 2022 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)). * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)). * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)). * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)). * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)). * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)). diff --git a/docs/changelogs/v22.6.1.1985-stable.md b/docs/changelogs/v22.6.1.1985-stable.md index c915d24fe00..7bd7038377a 100644 --- a/docs/changelogs/v22.6.1.1985-stable.md +++ b/docs/changelogs/v22.6.1.1985-stable.md @@ -160,7 +160,7 @@ sidebar_label: 2022 * fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)). * The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)). * Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)). -* Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). +* Do not drop the inner target table when executing `ALTER TABLE ... MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). * Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)). * Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)). * Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -180,7 +180,7 @@ sidebar_label: 2022 #### NO CL ENTRY * NO CL ENTRY: 'Revert "Fix mutations in tables with columns of type `Object`"'. [#37355](https://github.com/ClickHouse/ClickHouse/pull/37355) ([Alexander Tokmakov](https://github.com/tavplubix)). -* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool, and m…"'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove height restrictions from the query div in play web tool, and m..."'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Revert "Add support for preprocessing ZooKeeper operations in `clickhouse-keeper`"'. [#37534](https://github.com/ClickHouse/ClickHouse/pull/37534) ([Antonio Andelic](https://github.com/antonio2368)). * NO CL ENTRY: 'Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part"'. [#37545](https://github.com/ClickHouse/ClickHouse/pull/37545) ([Alexander Tokmakov](https://github.com/tavplubix)). * NO CL ENTRY: 'Revert "RFC: Fix converting types for UNION queries (may produce LOGICAL_ERROR)"'. [#37582](https://github.com/ClickHouse/ClickHouse/pull/37582) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.7.1.2484-stable.md b/docs/changelogs/v22.7.1.2484-stable.md index 7464b0449ee..c4a76c66e0c 100644 --- a/docs/changelogs/v22.7.1.2484-stable.md +++ b/docs/changelogs/v22.7.1.2484-stable.md @@ -410,7 +410,7 @@ sidebar_label: 2022 * Add test for [#39132](https://github.com/ClickHouse/ClickHouse/issues/39132) [#39173](https://github.com/ClickHouse/ClickHouse/pull/39173) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Suppression for BC check (`Cannot parse string 'Hello' as UInt64`) [#39176](https://github.com/ClickHouse/ClickHouse/pull/39176) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix 01961_roaring_memory_tracking test [#39187](https://github.com/ClickHouse/ClickHouse/pull/39187) ([Dmitry Novik](https://github.com/novikd)). -* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on … [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). +* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on ... [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)). * Fix exception in AsynchronousMetrics for s390x [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). * Optimize accesses to system.stack_trace (filter by name before sending signal) [#39212](https://github.com/ClickHouse/ClickHouse/pull/39212) ([Azat Khuzhin](https://github.com/azat)). * Enable warning "-Wdeprecated-dynamic-exception-spec" [#39213](https://github.com/ClickHouse/ClickHouse/pull/39213) ([Robert Schulze](https://github.com/rschu1ze)). diff --git a/docs/changelogs/v22.8.13.20-lts.md b/docs/changelogs/v22.8.13.20-lts.md index 0734f40bf3e..ad44fbfc5d6 100644 --- a/docs/changelogs/v22.8.13.20-lts.md +++ b/docs/changelogs/v22.8.13.20-lts.md @@ -20,4 +20,4 @@ sidebar_label: 2023 * Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). +* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un... [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v23.11.1.2711-stable.md b/docs/changelogs/v23.11.1.2711-stable.md index e32dee41dc7..0bdee08f5c9 100644 --- a/docs/changelogs/v23.11.1.2711-stable.md +++ b/docs/changelogs/v23.11.1.2711-stable.md @@ -217,7 +217,7 @@ sidebar_label: 2023 * S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). * Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re... [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). * Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). * Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). diff --git a/docs/changelogs/v23.12.1.1368-stable.md b/docs/changelogs/v23.12.1.1368-stable.md index 1a322ae9c0f..cb8ba57100e 100644 --- a/docs/changelogs/v23.12.1.1368-stable.md +++ b/docs/changelogs/v23.12.1.1368-stable.md @@ -272,7 +272,7 @@ sidebar_label: 2023 * Bump Azure to v1.6.0 [#58052](https://github.com/ClickHouse/ClickHouse/pull/58052) ([Robert Schulze](https://github.com/rschu1ze)). * Correct values for randomization [#58058](https://github.com/ClickHouse/ClickHouse/pull/58058) ([Anton Popov](https://github.com/CurtizJ)). * Non post request should be readonly [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). -* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test… [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). +* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test... [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)). * fix typo in the test 02479 [#58072](https://github.com/ClickHouse/ClickHouse/pull/58072) ([Sema Checherinda](https://github.com/CheSema)). * Bump Azure to 1.7.2 [#58075](https://github.com/ClickHouse/ClickHouse/pull/58075) ([Robert Schulze](https://github.com/rschu1ze)). * Fix flaky test `02567_and_consistency` [#58076](https://github.com/ClickHouse/ClickHouse/pull/58076) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v23.3.1.2823-lts.md b/docs/changelogs/v23.3.1.2823-lts.md index 0c9be3601da..f81aba53ebe 100644 --- a/docs/changelogs/v23.3.1.2823-lts.md +++ b/docs/changelogs/v23.3.1.2823-lts.md @@ -520,7 +520,7 @@ sidebar_label: 2023 * Improve script for updating clickhouse-docs [#48135](https://github.com/ClickHouse/ClickHouse/pull/48135) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix stdlib compatibility issues [#48150](https://github.com/ClickHouse/ClickHouse/pull/48150) ([DimasKovas](https://github.com/DimasKovas)). * Make test test_disallow_concurrency less flaky [#48152](https://github.com/ClickHouse/ClickHouse/pull/48152) ([Vitaly Baranov](https://github.com/vitlibar)). -* Remove unused mockSystemDatabase from gtest_transform_query_for_exter… [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). +* Remove unused mockSystemDatabase from gtest_transform_query_for_exter... [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). * Update environmental-sensors.md [#48166](https://github.com/ClickHouse/ClickHouse/pull/48166) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Correctly handle NULL constants in logical optimizer for new analyzer [#48168](https://github.com/ClickHouse/ClickHouse/pull/48168) ([Antonio Andelic](https://github.com/antonio2368)). * Try making KeeperMap test more stable [#48170](https://github.com/ClickHouse/ClickHouse/pull/48170) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/docs/changelogs/v23.5.1.3174-stable.md b/docs/changelogs/v23.5.1.3174-stable.md index 2212eb6e893..4bdd4139afc 100644 --- a/docs/changelogs/v23.5.1.3174-stable.md +++ b/docs/changelogs/v23.5.1.3174-stable.md @@ -474,7 +474,7 @@ sidebar_label: 2023 * Fix flakiness of test_distributed_load_balancing test [#49921](https://github.com/ClickHouse/ClickHouse/pull/49921) ([Azat Khuzhin](https://github.com/azat)). * Add some logging [#49925](https://github.com/ClickHouse/ClickHouse/pull/49925) ([Kseniia Sumarokova](https://github.com/kssenii)). * Support hardlinking parts transactionally [#49931](https://github.com/ClickHouse/ClickHouse/pull/49931) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e… [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e... [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)). * Follow up to [#49429](https://github.com/ClickHouse/ClickHouse/issues/49429) [#49964](https://github.com/ClickHouse/ClickHouse/pull/49964) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix flaky test_ssl_cert_authentication to use urllib3 [#49982](https://github.com/ClickHouse/ClickHouse/pull/49982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). * Fix woboq codebrowser build with -Wno-poison-system-directories [#49992](https://github.com/ClickHouse/ClickHouse/pull/49992) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md index 7c224b19350..05385d9c52b 100644 --- a/docs/changelogs/v23.8.1.2992-lts.md +++ b/docs/changelogs/v23.8.1.2992-lts.md @@ -272,7 +272,7 @@ sidebar_label: 2023 * Add more checks into ThreadStatus ctor. [#42019](https://github.com/ClickHouse/ClickHouse/pull/42019) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Refactor Query Tree visitor [#46740](https://github.com/ClickHouse/ClickHouse/pull/46740) ([Dmitry Novik](https://github.com/novikd)). * Revert "Revert "Randomize JIT settings in tests"" [#48282](https://github.com/ClickHouse/ClickHouse/pull/48282) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau… [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau... [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix IN with decimal in analyzer [#48754](https://github.com/ClickHouse/ClickHouse/pull/48754) ([vdimir](https://github.com/vdimir)). * Some unclear change in StorageBuffer::reschedule() for something [#49723](https://github.com/ClickHouse/ClickHouse/pull/49723) ([DimasKovas](https://github.com/DimasKovas)). * MergeTree & SipHash checksum big-endian support [#50276](https://github.com/ClickHouse/ClickHouse/pull/50276) ([ltrk2](https://github.com/ltrk2)). diff --git a/docs/changelogs/v24.1.3.31-stable.md b/docs/changelogs/v24.1.3.31-stable.md index 046ca451fbc..e898fba5c87 100644 --- a/docs/changelogs/v24.1.3.31-stable.md +++ b/docs/changelogs/v24.1.3.31-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2024 #### Bug Fix (user-visible misbehavior in an official stable release) -* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings... [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). * Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)). * Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)). diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md index 6113dd51ab1..02affe12c43 100644 --- a/docs/changelogs/v24.2.1.2248-stable.md +++ b/docs/changelogs/v24.2.1.2248-stable.md @@ -130,7 +130,7 @@ sidebar_label: 2024 * Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)). * Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)). * Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)). -* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `ASTAlterCommand::formatImpl` in case of column specific settings... [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)). * Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)). * Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)). diff --git a/docs/changelogs/v24.3.1.2672-lts.md b/docs/changelogs/v24.3.1.2672-lts.md index e5d008680a8..006ab941203 100644 --- a/docs/changelogs/v24.3.1.2672-lts.md +++ b/docs/changelogs/v24.3.1.2672-lts.md @@ -526,7 +526,7 @@ sidebar_label: 2024 * No "please" [#61916](https://github.com/ClickHouse/ClickHouse/pull/61916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Update version_date.tsv and changelogs after v23.12.6.19-stable [#61917](https://github.com/ClickHouse/ClickHouse/pull/61917) ([robot-clickhouse](https://github.com/robot-clickhouse)). * Update version_date.tsv and changelogs after v24.1.8.22-stable [#61918](https://github.com/ClickHouse/ClickHouse/pull/61918) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic… [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic... [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)). * Check is Rust avaiable for build, if not, suggest a way to disable Rust support [#61938](https://github.com/ClickHouse/ClickHouse/pull/61938) ([Azat Khuzhin](https://github.com/azat)). * CI: new ci menu in PR body [#61948](https://github.com/ClickHouse/ClickHouse/pull/61948) ([Max K.](https://github.com/maxknv)). * Remove flaky test `01193_metadata_loading` [#61961](https://github.com/ClickHouse/ClickHouse/pull/61961) ([Nikita Taranov](https://github.com/nickitat)). diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 77a550f2a0e..1444bc0e452 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, …) and the ternary operator `?:`. +**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, ...) and the ternary operator `?:`. ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -86,7 +86,7 @@ dst.ClickGoodEvent = click.GoodEvent; If necessary, the operator can be wrapped to the next line. In this case, the offset in front of it is increased. -**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, …) from the argument. +**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, ...) from the argument. **12.** Put a space after a comma, but not before it. The same rule goes for a semicolon inside a `for` expression. @@ -115,7 +115,7 @@ public: **16.** If the same `namespace` is used for the entire file, and there isn’t anything else significant, an offset is not necessary inside `namespace`. -**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, … +**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, ... But if the inner `statement` contains curly brackets or `else`, the external block should be written in curly brackets. diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index dbd1c270a4a..2749fa7e479 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -118,7 +118,7 @@ If the listing of files contains number ranges with leading zeros, use the const **Example** -Create table with files named `file000`, `file001`, … , `file999`: +Create table with files named `file000`, `file001`, ... , `file999`: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index dfa06801d04..cb1da1c8e68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -178,7 +178,7 @@ If the listing of files contains number ranges with leading zeros, use the const **Example with wildcards 1** -Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Create table with files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 23d98d4b20e..eda87fd06c1 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -71,7 +71,7 @@ WHERE table = 'visits' └───────────┴───────────────────┴────────┘ ``` -The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries. +The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md) queries. The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7862eef69f8..a009c4a32f3 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -954,7 +954,7 @@ In the case of `MergeTree` tables, data is getting to disk in different ways: - As a result of an insert (`INSERT` query). - During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). - When downloading from another replica. -- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). +- As a result of partition freezing [ALTER TABLE ... FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy: @@ -966,7 +966,7 @@ Under the hood, mutations and partition freezing make use of [hard links](https: In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file. Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](/docs/en/operations/system-tables/part_log.md/#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](/docs/en/operations/system-tables/parts.md/#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. -User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. +User can force moving a part or a partition from one volume to another using the query [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas. diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md index 7ea3f3e30d6..f6d6dae7eb6 100644 --- a/docs/en/engines/table-engines/special/external-data.md +++ b/docs/en/engines/table-engines/special/external-data.md @@ -29,7 +29,7 @@ Only a single table can be retrieved from stdin. The following parameters are optional: **–name**– Name of the table. If omitted, _data is used. **–format** – Data format in the file. If omitted, TabSeparated is used. -One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, … +One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, ... **–structure**– The table structure in the format`UserID UInt64`, `URL String`. Defines the column names and types. The files specified in ‘file’ will be parsed by the format specified in ‘format’, using the data types specified in ‘types’ or ‘structure’. The table will be uploaded to the server and accessible there as a temporary table with the name in ‘name’. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index d86f18ff982..2a20e74e20f 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -303,7 +303,7 @@ What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘ Limits the number of rows in the hash table that is used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. If a query contains multiple joins, ClickHouse checks this setting for every intermediate result. @@ -320,7 +320,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This setting applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 91b544c6a82..2b5cd11819a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2248,7 +2248,7 @@ Default value: 0. ## count_distinct_implementation {#count_distinct_implementation} -Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. +Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. Possible values: diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 8981ac1f752..1dc89b8dcf9 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -82,7 +82,7 @@ FROM In this case, you should remember that you do not know the histogram bin borders. -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) Checks whether the sequence contains an event chain that matches the pattern. @@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) +## sequenceCount(pattern)(time, cond1, cond2, ...) Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index e2a5bc53e32..856d447ac13 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 ## quantiles -Syntax: `quantiles(level1, level2, …)(x)` +Syntax: `quantiles(level1, level2, ...)(x)` All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index 87511a505dc..37f0d0e50ae 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -6,9 +6,9 @@ sidebar_label: AggregateFunction # AggregateFunction -Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(…)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix. +Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(...)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix. -`AggregateFunction(name, types_of_arguments…)` — parametric data type. +`AggregateFunction(name, types_of_arguments...)` — parametric data type. **Parameters** diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index 0316df7fe34..0c021b28f74 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -21,8 +21,8 @@ The `FixedString` type is efficient when data has the length of precisely `N` by Examples of the values that can be efficiently stored in `FixedString`-typed columns: - The binary representation of IP addresses (`FixedString(16)` for IPv6). -- Language codes (ru_RU, en_US … ). -- Currency codes (USD, RUB … ). +- Language codes (ru_RU, en_US ... ). +- Currency codes (USD, RUB ... ). - Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type. diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md index d118170cd39..579ee9bfa8b 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md @@ -6,7 +6,7 @@ sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) # Nested -## Nested(name1 Type1, Name2 Type2, …) +## Nested(name1 Type1, Name2 Type2, ...) A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 39f8409c1e1..4fb74ac30e4 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -5,7 +5,7 @@ sidebar_label: SimpleAggregateFunction --- # SimpleAggregateFunction -`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data. +`SimpleAggregateFunction(name, types_of_arguments...)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data. The common way to produce an aggregate function value is by calling the aggregate function with the [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate) suffix. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 6d95f3dc358..8b8527acfdf 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -140,6 +140,60 @@ Same as `intDiv` but returns zero when dividing by zero or when dividing a minim intDivOrZero(a, b) ``` +## isFinite + +Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. + +**Syntax** + +```sql +isFinite(x) +``` + +## isInfinite + +Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. + +**Syntax** + +```sql +isInfinite(x) +``` + +## ifNotFinite + +Checks whether a floating point value is finite. + +**Syntax** + +```sql +ifNotFinite(x,y) +``` + +**Arguments** + +- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md). +- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md). + +**Returned value** + +- `x` if `x` is finite. +- `y` if `x` is not finite. + +**Example** + +Query: + + SELECT 1/0 as infimum, ifNotFinite(infimum,42) + +Result: + + ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ + │ inf │ 42 │ + └─────────┴───────────────────────────────┘ + +You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. + ## modulo Calculates the remainder of the division of two values `a` by `b`. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 87e733a4b0c..f929ea00b8b 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -561,7 +561,7 @@ Result: └─────────────┴─────────────┴────────────────┴─────────────────┘ ``` -## array(x1, …), operator \[x1, …\] +## array(x1, ...), operator \[x1, ...\] Creates an array from the function arguments. The arguments must be constants and have types that have the smallest common type. At least one argument must be passed, because otherwise it isn’t clear which type of array to create. That is, you can’t use this function to create an empty array (to do that, use the ‘emptyArray\*’ function described above). @@ -768,9 +768,9 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Elements set to `NULL` are handled as normal values. -## arrayCount(\[func,\] arr1, …) +## arrayCount(\[func,\] arr1, ...) -Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array. +Returns the number of elements for which `func(arr1[i], ..., arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array. Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. @@ -847,7 +847,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) -Returns the array \[1, 2, 3, …, length (arr) \] +Returns the array \[1, 2, 3, ..., length (arr) \] This function is normally used with ARRAY JOIN. It allows counting something just once for each array after applying ARRAY JOIN. Example: @@ -887,7 +887,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) This function can also be used in higher-order functions. For example, you can use it to get array indexes for elements that match a condition. -## arrayEnumerateUniq(arr, …) +## arrayEnumerateUniq(arr, ...) Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value. For example: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\]. @@ -1206,7 +1206,7 @@ Result: └───────────────────┘ ``` -## arraySort(\[func,\] arr, …) {#sort} +## arraySort(\[func,\] arr, ...) {#sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -1307,11 +1307,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. ::: -## arrayPartialSort(\[func,\] limit, arr, …) +## arrayPartialSort(\[func,\] limit, arr, ...) Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. -## arrayReverseSort(\[func,\] arr, …) {#reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -1412,7 +1412,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayPartialReverseSort(\[func,\] limit, arr, …) +## arrayPartialReverseSort(\[func,\] limit, arr, ...) Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. @@ -1535,7 +1535,7 @@ Result: [3,9,1,4,5,6,7,8,2,10] ``` -## arrayUniq(arr, …) +## arrayUniq(arr, ...) If one argument is passed, it counts the number of different elements in the array. If multiple arguments are passed, it counts the number of different tuples of elements at corresponding positions in multiple arrays. @@ -2079,9 +2079,9 @@ Result: └───────────────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) +## arrayMap(func, arr1, ...) -Returns an array obtained from the original arrays by application of `func(arr1[i], …, arrN[i])` for each element. Arrays `arr1` … `arrN` must have the same number of elements. +Returns an array obtained from the original arrays by application of `func(arr1[i], ..., arrN[i])` for each element. Arrays `arr1` ... `arrN` must have the same number of elements. Examples: @@ -2109,9 +2109,9 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res Note that the `arrayMap` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayFilter(func, arr1, …) +## arrayFilter(func, arr1, ...) -Returns an array containing only the elements in `arr1` for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns an array containing only the elements in `arr1` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Examples: @@ -2142,9 +2142,9 @@ SELECT Note that the `arrayFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayFill(func, arr1, …) +## arrayFill(func, arr1, ...) -Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], …, arrN[i])` returns 0. The first element of `arr1` will not be replaced. +Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The first element of `arr1` will not be replaced. Examples: @@ -2160,9 +2160,9 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Note that the `arrayFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayReverseFill(func, arr1, …) +## arrayReverseFill(func, arr1, ...) -Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], …, arrN[i])` returns 0. The last element of `arr1` will not be replaced. +Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The last element of `arr1` will not be replaced. Examples: @@ -2178,9 +2178,9 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Note that the `arrayReverseFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arraySplit(func, arr1, …) +## arraySplit(func, arr1, ...) -Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. +Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. Examples: @@ -2196,9 +2196,9 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the `arraySplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayReverseSplit(func, arr1, …) +## arrayReverseSplit(func, arr1, ...) -Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. +Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. Examples: @@ -2214,30 +2214,30 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Note that the `arrayReverseSplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayExists(\[func,\] arr1, …) +## arrayExists(\[func,\] arr1, ...) -Returns 1 if there is at least one element in `arr` for which `func(arr1[i], …, arrN[i])` returns something other than 0. Otherwise, it returns 0. +Returns 1 if there is at least one element in `arr` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Otherwise, it returns 0. Note that the `arrayExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayAll(\[func,\] arr1, …) +## arrayAll(\[func,\] arr1, ...) -Returns 1 if `func(arr1[i], …, arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0. +Returns 1 if `func(arr1[i], ..., arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0. Note that the `arrayAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayFirst(func, arr1, …) +## arrayFirst(func, arr1, ...) -Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. ## arrayFirstOrNull -Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`. +Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise it returns `NULL`. **Syntax** ```sql -arrayFirstOrNull(func, arr1, …) +arrayFirstOrNull(func, arr1, ...) ``` **Parameters** @@ -2292,20 +2292,20 @@ Result: \N ``` -## arrayLast(func, arr1, …) +## arrayLast(func, arr1, ...) -Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. ## arrayLastOrNull -Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`. +Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise returns `NULL`. **Syntax** ```sql -arrayLastOrNull(func, arr1, …) +arrayLastOrNull(func, arr1, ...) ``` **Parameters** @@ -2348,15 +2348,15 @@ Result: \N ``` -## arrayFirstIndex(func, arr1, …) +## arrayFirstIndex(func, arr1, ...) -Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the index of the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayLastIndex(func, arr1, …) +## arrayLastIndex(func, arr1, ...) -Returns the index of the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +Returns the index of the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Note that the `arrayLastIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. @@ -2580,9 +2580,9 @@ Result: └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) +## arrayCumSum(\[func,\] arr1, ...) -Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. +Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. **Syntax** @@ -2614,9 +2614,9 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayCumSumNonNegative(\[func,\] arr1, …) +## arrayCumSumNonNegative(\[func,\] arr1, ...) -Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. +Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. **Syntax** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 843f22e5a6f..1a56691ffc0 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1499,7 +1499,7 @@ This function returns the week number for date or datetime. The two-argument for The following table describes how the mode argument works. -| Mode | First day of week | Range | Week 1 is the first week … | +| Mode | First day of week | Range | Week 1 is the first week ... | |------|-------------------|-------|-------------------------------| | 0 | Sunday | 0-53 | with a Sunday in this year | | 1 | Monday | 0-53 | with 4 or more days this year | diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index e920ab82988..ba72b3cc6ed 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -386,7 +386,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 SELECT isValidJSON('not a json') = 0 ``` -## JSONHas(json\[, indices_or_keys\]…) +## JSONHas(json\[, indices_or_keys\]...) If the value exists in the JSON document, `1` will be returned. @@ -419,7 +419,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' ``` -## JSONLength(json\[, indices_or_keys\]…) +## JSONLength(json\[, indices_or_keys\]...) Return the length of a JSON array or a JSON object. @@ -432,7 +432,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 ``` -## JSONType(json\[, indices_or_keys\]…) +## JSONType(json\[, indices_or_keys\]...) Return the type of a JSON value. @@ -446,13 +446,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' ``` -## JSONExtractUInt(json\[, indices_or_keys\]…) +## JSONExtractUInt(json\[, indices_or_keys\]...) -## JSONExtractInt(json\[, indices_or_keys\]…) +## JSONExtractInt(json\[, indices_or_keys\]...) -## JSONExtractFloat(json\[, indices_or_keys\]…) +## JSONExtractFloat(json\[, indices_or_keys\]...) -## JSONExtractBool(json\[, indices_or_keys\]…) +## JSONExtractBool(json\[, indices_or_keys\]...) Parses a JSON and extract a value. These functions are similar to `visitParam` functions. @@ -466,7 +466,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 ``` -## JSONExtractString(json\[, indices_or_keys\]…) +## JSONExtractString(json\[, indices_or_keys\]...) Parses a JSON and extract a string. This function is similar to `visitParamExtractString` functions. @@ -484,7 +484,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = '' SELECT JSONExtractString('{"abc":"hello}', 'abc') = '' ``` -## JSONExtract(json\[, indices_or_keys…\], Return_type) +## JSONExtract(json\[, indices_or_keys...\], Return_type) Parses a JSON and extract a value of the given ClickHouse data type. @@ -506,7 +506,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' ``` -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) Parses key-value pairs from a JSON where the values are of the given ClickHouse data type. @@ -554,7 +554,7 @@ text └────────────────────────────────────────────────────────────┘ ``` -## JSONExtractRaw(json\[, indices_or_keys\]…) +## JSONExtractRaw(json\[, indices_or_keys\]...) Returns a part of JSON as unparsed string. @@ -566,7 +566,7 @@ Example: SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'; ``` -## JSONExtractArrayRaw(json\[, indices_or_keys…\]) +## JSONExtractArrayRaw(json\[, indices_or_keys...\]) Returns an array with elements of JSON array, each represented as unparsed string. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 5b77f16027b..4501d1f43d3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -172,7 +172,7 @@ Result: ## visibleWidth Calculates the approximate width when outputting values to the console in text format (tab-separated). -This function is used by the system to implement [Pretty formats](../formats.mdx). +This function is used by the system to implement [Pretty formats](../../interfaces/formats.md). `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. @@ -335,7 +335,7 @@ The argument is internally still evaluated. Useful e.g. for benchmarks. **Syntax** ```sql -ignore(…) +ignore(x) ``` ## sleep @@ -541,60 +541,6 @@ Result: └────────────────────┘ ``` -## isFinite - -Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. - -**Syntax** - -```sql -isFinite(x) -``` - -## isInfinite - -Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. - -**Syntax** - -```sql -isInfinite(x) -``` - -## ifNotFinite - -Checks whether a floating point value is finite. - -**Syntax** - -```sql -ifNotFinite(x,y) -``` - -**Arguments** - -- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md). - -**Returned value** - -- `x` if `x` is finite. -- `y` if `x` is not finite. - -**Example** - -Query: - - SELECT 1/0 as infimum, ifNotFinite(infimum,42) - -Result: - - ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐ - │ inf │ 42 │ - └─────────┴───────────────────────────────┘ - -You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. - ## isNaN Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. @@ -2303,7 +2249,7 @@ Accepts a path to a catboost model and model arguments (features). Returns Float **Syntax** ```sql -catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) +catboostEvaluate(path_to_model, feature_1, feature_2, ..., feature_n) ``` **Example** @@ -2351,7 +2297,7 @@ Throw an exception if argument `x` is true. **Syntax** ```sql -throwIf(x\[, message\[, error_code\]\]) +throwIf(x[, message[, error_code]]) ``` **Arguments** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 0b761b62006..0e183626555 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -139,7 +139,7 @@ Format the `pattern` string with the values (strings, integers, etc.) listed in **Syntax** ```sql -format(pattern, s0, s1, …) +format(pattern, s0, s1, ...) ``` **Example** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 9738c19bf3c..a6eb4a4ceff 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -799,7 +799,7 @@ If you only want to search multiple substrings in a string, you can use function **Syntax** ```sql -multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAnyIndex @@ -809,7 +809,7 @@ Like `multiMatchAny` but returns any index that matches the haystack. **Syntax** ```sql -multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAllIndices @@ -819,7 +819,7 @@ Like `multiMatchAny` but returns the array of all indices that match the haystac **Syntax** ```sql -multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAny @@ -833,7 +833,7 @@ Like `multiMatchAny` but returns 1 if any pattern matches the haystack within a **Syntax** ```sql -multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAnyIndex @@ -843,7 +843,7 @@ Like `multiFuzzyMatchAny` but returns any index that matches the haystack within **Syntax** ```sql -multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAllIndices @@ -853,7 +853,7 @@ Like `multiFuzzyMatchAny` but returns the array of all indices in any order that **Syntax** ```sql -multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## extract diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 64b1732597f..c2219bb3f90 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,15 +7,15 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function. +For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. -The function implements the operator `(x, y, …)`. +The function implements the operator `(x, y, ...)`. **Syntax** ``` sql -tuple(x, y, …) +tuple(x, y, ...) ``` ## tupleElement diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 377283bc006..6386b4d5b1d 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -589,7 +589,7 @@ mapApply(func, map) **Returned value** -- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element. +- Returns a map obtained from the original map by application of `func(map1[i], ..., mapN[i])` for each element. **Example** @@ -629,7 +629,7 @@ mapFilter(func, map) **Returned value** -- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0. +- Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0. **Example** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index a0b0170721c..6da82e689a9 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -16,7 +16,7 @@ If the relevant part isn’t present in a URL, an empty string is returned. Extracts the protocol from a URL. -Examples of typical returned values: http, https, ftp, mailto, tel, magnet… +Examples of typical returned values: http, https, ftp, mailto, tel, magnet... ### domain diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index f6fb179d969..320828f0de9 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -4,7 +4,7 @@ sidebar_position: 51 sidebar_label: COMMENT --- -# ALTER TABLE … MODIFY COMMENT +# ALTER TABLE ... MODIFY COMMENT Adds, modifies, or removes comment to the table, regardless if it was set before or not. Comment change is reflected in both [system.tables](../../../operations/system-tables/tables.md) and `SHOW CREATE TABLE` query. diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index b6f45b67d52..af56bec7a11 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE Statement +# ALTER TABLE ... DELETE Statement ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7961315c193..3cfb99cff83 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -42,7 +42,7 @@ These `ALTER` statements modify entities related to role-based access control: ## Mutations -`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. +`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE ... DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE ... UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. For `*MergeTree` tables mutations execute by **rewriting whole data parts**. There is no atomicity - parts are substituted for mutated parts as soon as they are ready and a `SELECT` query that started executing during a mutation will see data from parts that have already been mutated along with data from parts that have not been mutated yet. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index ab7d0ca7378..0b300e5849a 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE Statements +# ALTER TABLE ... UPDATE Statements ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index e063b27424e..83e8e9311b4 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# ALTER TABLE … MODIFY QUERY Statement +# ALTER TABLE ... MODIFY QUERY Statement -You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. +You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE ... MODIFY QUERY` statement without interrupting ingestion process. This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. @@ -198,6 +198,6 @@ SELECT * FROM mv; `ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh). -## ALTER TABLE … MODIFY REFRESH Statement +## ALTER TABLE ... MODIFY REFRESH Statement `ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters). diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 073a3c0d246..b526c94e508 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -306,7 +306,7 @@ CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTE Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them. -You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE … MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused. +You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE ... MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused. ### Monitoring New Windows diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index a76692cf291..f3dadabd25f 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -73,7 +73,7 @@ Data can be passed to the INSERT in any [format](../../interfaces/formats.md#for INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -For example, the following query format is identical to the basic version of INSERT … VALUES: +For example, the following query format is identical to the basic version of INSERT ... VALUES: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md index d61a5a44b58..58fdf988bf3 100644 --- a/docs/en/sql-reference/statements/select/limit.md +++ b/docs/en/sql-reference/statements/select/limit.md @@ -17,11 +17,11 @@ If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting. ::: -## LIMIT … WITH TIES Modifier +## LIMIT ... WITH TIES Modifier When you set `WITH TIES` modifier for `LIMIT n[,m]` and specify `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`. -This modifier also can be combined with [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill). +This modifier also can be combined with [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill). For example, the following query diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index d6432a7b4f8..512a58d7cd9 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -283,7 +283,7 @@ In `MaterializedView`-engine tables the optimization works with views like `SELE ## ORDER BY Expr WITH FILL Modifier -This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties). +This modifier also can be combined with [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties). `WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters. All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 3a63811add6..f66178afbb2 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -169,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons **Example** -Query the total number of rows in files named `file000`, `file001`, … , `file999`: +Query the total number of rows in files named `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 80077ecdb33..b891d88df31 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -130,7 +130,7 @@ FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefi If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 92f904b8841..d65615e7588 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -85,7 +85,7 @@ If your listing of files contains number ranges with leading zeros, use the cons **Example** -Query the data from files named `file000`, `file001`, … , `file999`: +Query the data from files named `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 38d77a98749..cbef80371a3 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -137,7 +137,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index cd1297504af..08fa7a1e603 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, …), а также тернарного оператора `?:` ставятся пробелы. +**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, ...), а также тернарного оператора `?:` ставятся пробелы. ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -86,7 +86,7 @@ dst.ClickGoodEvent = click.GoodEvent; При необходимости, оператор может быть перенесён на новую строку. В этом случае, перед ним увеличивается отступ. -**11.** Унарные операторы `--`, `++`, `*`, `&`, … не отделяются от аргумента пробелом. +**11.** Унарные операторы `--`, `++`, `*`, `&`, ... не отделяются от аргумента пробелом. **12.** После запятой ставится пробел, а перед — нет. Аналогично для точки с запятой внутри выражения `for`. @@ -115,7 +115,7 @@ public: **16.** Если на весь файл один `namespace` и кроме него ничего существенного нет, то отступ внутри `namespace` не нужен. -**17.** Если блок для выражения `if`, `for`, `while`, … состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, … +**17.** Если блок для выражения `if`, `for`, `while`, ... состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, ... Если внутренний `statement` содержит фигурные скобки или `else`, то внешний блок следует писать в фигурных скобках. @@ -266,7 +266,7 @@ void executeQuery( Пример взят с ресурса http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/. -**7.** Нельзя писать мусорные комментарии (автор, дата создания…) в начале каждого файла. +**7.** Нельзя писать мусорные комментарии (автор, дата создания...) в начале каждого файла. **8.** Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими». diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index 72087b56652..cf43eef73e3 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs **Example** -Создадим таблицу с именами `file000`, `file001`, … , `file999`: +Создадим таблицу с именами `file000`, `file001`, ... , `file999`: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 720aa589122..a1c69df4d0a 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -73,7 +73,7 @@ SELECT * FROM s3_engine_table LIMIT 2; **Пример подстановки 1** -Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 46597c94370..c3203804211 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -66,7 +66,7 @@ WHERE table = 'visits' └───────────┴───────────────────┴────────┘ ``` -Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md). +Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md). Столбец `name` содержит названия кусков партиций. Значения из этого столбца можно использовать в запросах [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition). diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index faa492d4d85..49ba229b1d5 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -771,7 +771,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - В результате вставки (запрос `INSERT`). - В фоновых операциях слияний и [мутаций](../../../sql-reference/statements/alter/index.md#mutations). - При скачивании данных с другой реплики. -- В результате заморозки партиций [ALTER TABLE … FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition). +- В результате заморозки партиций [ALTER TABLE ... FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition). Во всех случаях, кроме мутаций и заморозки партиций, при записи куска выбирается том и диск в соответствии с указанной конфигурацией хранилища: @@ -781,7 +781,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Мутации и запросы заморозки партиций в реализации используют [жесткие ссылки](https://ru.wikipedia.org/wiki/%D0%96%D1%91%D1%81%D1%82%D0%BA%D0%B0%D1%8F_%D1%81%D1%81%D1%8B%D0%BB%D0%BA%D0%B0). Жесткие ссылки между различными дисками не поддерживаются, поэтому в случае таких операций куски размещаются на тех же дисках, что и исходные. В фоне куски перемещаются между томами на основе информации о занятом месте (настройка `move_factor`) по порядку, в котором указаны тома в конфигурации. Данные никогда не перемещаются с последнего тома и на первый том. Следить за фоновыми перемещениями можно с помощью системных таблиц [system.part_log](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-part-log) (поле `type = MOVE_PART`) и [system.parts](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-parts) (поля `path` и `disk`). Также подробная информация о перемещениях доступна в логах сервера. -С помощью запроса [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. +С помощью запроса [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. Перемещения данных не взаимодействуют с репликацией данных, поэтому на разных репликах одной и той же таблицы могут быть указаны разные политики хранения. diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 881566e5f34..3d9737096f5 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -31,7 +31,7 @@ ClickHouse позволяет отправить на сервер данные, - **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, ... - **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. diff --git a/docs/ru/faq/general/olap.md b/docs/ru/faq/general/olap.md index c9021f7c92e..bcfe9663381 100644 --- a/docs/ru/faq/general/olap.md +++ b/docs/ru/faq/general/olap.md @@ -9,13 +9,13 @@ sidebar_position: 100 [OLAP](https://ru.wikipedia.org/wiki/OLAP) (OnLine Analytical Processing) переводится как обработка данных в реальном времени. Это широкий термин, который можно рассмотреть с двух сторон: с технической и с точки зрения бизнеса. Для самого общего понимания можно просто прочитать его с конца: **Processing** - Обрабатываются некие исходные данные… + Обрабатываются некие исходные данные... **Analytical** -: … чтобы получить какие-то аналитические отчеты или новые знания… +: ... чтобы получить какие-то аналитические отчеты или новые знания... **OnLine** -: … в реальном времени, практически без задержек на обработку. +: ... в реальном времени, практически без задержек на обработку. ## OLAP с точки зрения бизнеса {#olap-from-the-business-perspective} diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 12d0c18c3a1..a42033e7d41 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -196,7 +196,7 @@ real 75m56.214s (Импорт данных напрямую из Postgres также возможен с использованием `COPY ... TO PROGRAM`.) -К сожалению, все поля, связанные с погодой (precipitation…average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных. +К сожалению, все поля, связанные с погодой (precipitation...average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных. Для начала мы создадим таблицу на одном сервере. Позже мы сделаем таблицу распределенной. diff --git a/docs/ru/index.md b/docs/ru/index.md index 29f2bbe07fb..d551d492af5 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -15,7 +15,7 @@ ClickHouse — столбцовая система управления база | #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | | #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | | #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| #N | ... | ... | ... | ... | ... | То есть, значения, относящиеся к одной строке, физически хранятся рядом. @@ -26,11 +26,11 @@ ClickHouse — столбцовая система управления база | Строка: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | В примерах изображён только порядок расположения данных. То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе. diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index d1d38a587c6..e82a5a008eb 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -260,7 +260,7 @@ FORMAT Null; Ограничивает количество строк в хэш-таблице, используемой при соединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). +Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. @@ -277,7 +277,7 @@ FORMAT Null; Ограничивает размер (в байтах) хэш-таблицы, используемой при объединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). +Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 2b3607dcf08..3a70a0bac12 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1859,7 +1859,7 @@ SELECT * FROM test_table ## count_distinct_implementation {#settings-count_distinct_implementation} -Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count). +Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count). Возможные значения: diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 6463f6bd95d..e6a61d9b381 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -82,7 +82,7 @@ FROM В этом случае необходимо помнить, что границы корзин гистограммы не известны. -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} Проверяет, содержит ли последовательность событий цепочку, которая соответствует указанному шаблону. @@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начинает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index fed0f8b328b..a0a430f7a68 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 ## quantiles {#quantiles} -Синтаксис: `quantiles(level1, level2, …)(x)` +Синтаксис: `quantiles(level1, level2, ...)(x)` Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями. diff --git a/docs/ru/sql-reference/data-types/aggregatefunction.md b/docs/ru/sql-reference/data-types/aggregatefunction.md index e42b467e4af..0481151c7e4 100644 --- a/docs/ru/sql-reference/data-types/aggregatefunction.md +++ b/docs/ru/sql-reference/data-types/aggregatefunction.md @@ -6,9 +6,9 @@ sidebar_label: AggregateFunction # AggregateFunction {#data-type-aggregatefunction} -Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. +Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(...), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. -`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных. +`AggregateFunction(name, types_of_arguments...)` — параметрический тип данных. **Параметры** diff --git a/docs/ru/sql-reference/data-types/fixedstring.md b/docs/ru/sql-reference/data-types/fixedstring.md index d7a4e865903..56a5632f88d 100644 --- a/docs/ru/sql-reference/data-types/fixedstring.md +++ b/docs/ru/sql-reference/data-types/fixedstring.md @@ -21,8 +21,8 @@ sidebar_label: FixedString(N) Примеры значений, которые можно эффективно хранить в столбцах типа `FixedString`: - Двоичное представление IP-адреса (`FixedString(16)` для IPv6). -- Коды языков (ru_RU, en_US … ). -- Коды валют (USD, RUB … ). +- Коды языков (ru_RU, en_US ... ). +- Коды валют (USD, RUB ... ). - Двоичное представление хэшей (`FixedString(16)` для MD5, `FixedString(32)` для SHA256). Для хранения значений UUID используйте тип данных [UUID](uuid.md). diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md index 4ec8333d563..8fd293a0415 100644 --- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md @@ -3,7 +3,7 @@ slug: /ru/sql-reference/data-types/nested-data-structures/nested --- # Nested {#nested} -## Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +## Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2} Вложенная структура данных - это как будто вложенная таблица. Параметры вложенной структуры данных - имена и типы столбцов, указываются так же, как у запроса CREATE. Каждой строке таблицы может соответствовать произвольное количество строк вложенной структуры данных. diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md index 8953134d154..9d86c26c563 100644 --- a/docs/ru/sql-reference/data-types/tuple.md +++ b/docs/ru/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(T1, T2, …) {#tuplet1-t2} +# Tuple(T1, T2, ...) {#tuplet1-t2} Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 1f06bdf264a..825e3f06be2 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -161,7 +161,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2); ``` -## array(x1, …), оператор \[x1, …\] {#arrayx1-operator-x1} +## array(x1, ...), оператор \[x1, ...\] {#arrayx1-operator-x1} Создаёт массив из аргументов функции. Аргументы должны быть константами и иметь типы, для которых есть наименьший общий тип. Должен быть передан хотя бы один аргумент, так как иначе непонятно, какого типа создавать массив. То есть, с помощью этой функции невозможно создать пустой массив (для этого используйте функции emptyArray\*, описанные выше). @@ -308,7 +308,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Элементы, равные `NULL`, обрабатываются как обычные значения. -## arrayCount(\[func,\] arr1, …) {#array-count} +## arrayCount(\[func,\] arr1, ...) {#array-count} Возвращает количество элементов массива `arr`, для которых функция `func` возвращает не 0. Если `func` не указана - возвращает количество ненулевых элементов массива. @@ -335,7 +335,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) {#array_functions-arrayenumerate} -Возвращает массив \[1, 2, 3, …, length(arr)\] +Возвращает массив \[1, 2, 3, ..., length(arr)\] Эта функция обычно используется совместно с ARRAY JOIN. Она позволяет, после применения ARRAY JOIN, посчитать что-либо только один раз для каждого массива. Пример: @@ -375,7 +375,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) Также эта функция может быть использована в функциях высшего порядка. Например, с её помощью можно достать индексы массива для элементов, удовлетворяющих некоторому условию. -## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr} +## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr} Возвращает массив, такого же размера, как исходный, где для каждого элемента указано, какой он по счету среди элементов с таким же значением. Например: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\]. @@ -597,7 +597,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Элементы массива равные `NULL` обрабатываются как обычные значения. -## arraySort(\[func,\] arr, …) {#array_functions-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-sort} Возвращает массив `arr`, отсортированный в восходящем порядке. Если задана функция `func`, то порядок сортировки определяется результатом применения этой функции на элементы массива `arr`. Если `func` принимает несколько аргументов, то в функцию `arraySort` нужно передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания `arraySort`. @@ -698,11 +698,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: -## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} +## arrayPartialSort(\[func,\] limit, arr, ...) {#array_functions-sort} То же, что и `arraySort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в возрастающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort} Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`. @@ -803,11 +803,11 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayPartialReverseSort(\[func,\] limit, arr, …) {#array_functions-sort} +## arrayPartialReverseSort(\[func,\] limit, arr, ...) {#array_functions-sort} То же, что и `arrayReverseSort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в убывающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. -## arrayUniq(arr, …) {#array-functions-arrayuniq} +## arrayUniq(arr, ...) {#array-functions-arrayuniq} Если передан один аргумент, считает количество разных элементов в массиве. Если передано несколько аргументов, считает количество разных кортежей из элементов на соответствующих позициях в нескольких массивах. @@ -1174,7 +1174,7 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]); └──────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) {#array-map} +## arrayMap(func, arr1, ...) {#array-map} Возвращает массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`. @@ -1204,7 +1204,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res; Функция `arrayMap` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFilter(func, arr1, …) {#array-filter} +## arrayFilter(func, arr1, ...) {#array-filter} Возвращает массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0. @@ -1237,7 +1237,7 @@ SELECT Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFill(func, arr1, …) {#array-fill} +## arrayFill(func, arr1, ...) {#array-fill} Перебирает `arr1` от первого элемента к последнему и заменяет `arr1[i]` на `arr1[i - 1]`, если `func` вернула 0. Первый элемент `arr1` остаётся неизменным. @@ -1255,7 +1255,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, Функция `arrayFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayReverseFill(func, arr1, …) {#array-reverse-fill} +## arrayReverseFill(func, arr1, ...) {#array-reverse-fill} Перебирает `arr1` от последнего элемента к первому и заменяет `arr1[i]` на `arr1[i + 1]`, если `func` вернула 0. Последний элемент `arr1` остаётся неизменным. @@ -1273,7 +1273,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, Функция `arrayReverseFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arraySplit(func, arr1, …) {#array-split} +## arraySplit(func, arr1, ...) {#array-split} Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в левую часть. Массив не разбивается по первому элементу. @@ -1291,7 +1291,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Функция `arraySplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayReverseSplit(func, arr1, …) {#array-reverse-split} +## arrayReverseSplit(func, arr1, ...) {#array-reverse-split} Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в правую часть. Массив не разбивается по последнему элементу. @@ -1309,25 +1309,25 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res Функция `arrayReverseSplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0. Функция `arrayExists` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. -## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} Возвращает 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе возвращает 0. Функция `arrayAll` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. -## arrayFirst(func, arr1, …) {#array-first} +## arrayFirst(func, arr1, ...) {#array-first} Возвращает первый элемент массива `arr1`, для которого функция func возвращает не 0. Функция `arrayFirst` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arrayFirstIndex(func, arr1, …) {#array-first-index} +## arrayFirstIndex(func, arr1, ...) {#array-first-index} Возвращает индекс первого элемента массива `arr1`, для которого функция func возвращает не 0. @@ -1599,7 +1599,7 @@ SELECT arraySum(x -> x*x, [2, 3]) AS res; └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 56ae4359bf1..bcc5f807c32 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -559,7 +559,7 @@ SELECT Описание режимов (mode): -| Mode | Первый день недели | Диапазон | Неделя 1 это первая неделя … | +| Mode | Первый день недели | Диапазон | Неделя 1 это первая неделя ... | | ----------- | -------- | -------- | ------------------ | |0|Воскресенье|0-53|с воскресеньем в этом году |1|Понедельник|0-53|с 4-мя или более днями в этом году diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 123f40ce05d..18f625bf80f 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -88,7 +88,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1 SELECT isValidJSON('not a json') = 0 ``` -## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys} +## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys} Если значение существует в документе JSON, то возвращается `1`. @@ -121,7 +121,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' ``` -## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys} +## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys} Возвращает длину массива JSON или объекта JSON. @@ -134,7 +134,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 ``` -## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys} +## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys} Возвращает тип значения JSON. @@ -148,13 +148,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' ``` -## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys} +## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys} -## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys} +## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys} -## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys} +## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys} -## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys} +## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys} Парсит JSON и извлекает значение. Эти функции аналогичны функциям `visitParam`. @@ -168,7 +168,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 ``` -## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys} +## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys} Парсит JSON и извлекает строку. Эта функция аналогична функции `visitParamExtractString`. @@ -186,7 +186,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = '' SELECT JSONExtractString('{"abc":"hello}', 'abc') = '' ``` -## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type} +## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type} Парсит JSON и извлекает значение с заданным типом данных. @@ -207,7 +207,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' ``` -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} Разбор пар ключ-значение из JSON, где значение имеет тип данных ClickHouse. @@ -255,7 +255,7 @@ text └────────────────────────────────────────────────────────────┘ ``` -## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys} +## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys} Возвращает часть JSON в виде строки, содержащей неразобранную подстроку. @@ -267,7 +267,7 @@ text SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'; ``` -## JSONExtractArrayRaw(json\[, indices_or_keys\]…) {#jsonextractarrayrawjson-indices-or-keys} +## JSONExtractArrayRaw(json\[, indices_or_keys\]...) {#jsonextractarrayrawjson-indices-or-keys} Возвращает массив из элементов JSON массива, каждый из которых представлен в виде строки с неразобранными подстроками из JSON. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 835aed934d5..f7637cfa3f7 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -286,7 +286,7 @@ SELECT byteSize(NULL, 1, 0.3, ''); Превращает константу в полноценный столбец, содержащий только одно значение. В ClickHouse полноценные столбцы и константы представлены в памяти по-разному. Функции по-разному работают для аргументов-констант и обычных аргументов (выполняется разный код), хотя результат почти всегда должен быть одинаковым. Эта функция предназначена для отладки такого поведения. -## ignore(…) {#ignore} +## ignore(...) {#ignore} Принимает любые аргументы, в т.ч. `NULL`, всегда возвращает 0. При этом, аргумент всё равно вычисляется. Это может использоваться для бенчмарков. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index eeb5752c626..fc258f7b4cf 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -358,7 +358,7 @@ SELECT repeat('abc', 10); Разворачивает последовательность кодовых точек Unicode, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Иначе — что-то делает (не кидает исключение). -## format(pattern, s0, s1, …) {#format} +## format(pattern, s0, s1, ...) {#format} Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` — упрощенная версия шаблона в языке Python. Шаблон содержит «заменяющие поля», которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа). diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 4f9ae4428a4..53da9a6e791 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -311,19 +311,19 @@ Result: Смотрите `multiSearchAllPositions`. -## multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} +## multiSearchFirstPosition(haystack, \[needle1, needle2, ..., needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} Так же, как и `position`, только возвращает оффсет первого вхождения любого из needles. Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstPositionCaseInsensitive, multiSearchFirstPositionUTF8, multiSearchFirstPositionCaseInsensitiveUTF8`. -## multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} +## multiSearchFirstIndex(haystack, \[needle1, needle2, ..., needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} Возвращает индекс `i` (нумерация с единицы) первой найденной строки needlei в строке `haystack` и 0 иначе. Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. -## multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) {#function-multisearchany} +## multiSearchAny(haystack, \[needle1, needle2, ..., needlen\]) {#function-multisearchany} Возвращает 1, если хотя бы одна подстрока needlei нашлась в строке `haystack` и 0 иначе. @@ -343,30 +343,30 @@ Result: Регулярное выражение работает со строкой как с набором байт. Регулярное выражение не может содержать нулевые байты. Для шаблонов на поиск подстроки в строке, лучше используйте LIKE или position, так как они работают существенно быстрее. -## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} +## multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. :::note Примечание Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. ::: -## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} +## multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, только возвращает любой индекс подходящего регулярного выражения. -## multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn} +## multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке. -## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами. -## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} То же, что и `multiFuzzyMatchAny`, только возвращает любой индекс подходящего регулярного выражения в пределах константного редакционного расстояния. -## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn} То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния. diff --git a/docs/ru/sql-reference/functions/tuple-functions.md b/docs/ru/sql-reference/functions/tuple-functions.md index c702e5d00b1..70ae44aa627 100644 --- a/docs/ru/sql-reference/functions/tuple-functions.md +++ b/docs/ru/sql-reference/functions/tuple-functions.md @@ -9,15 +9,15 @@ sidebar_label: Функции для работы с кортежами ## tuple {#tuple} Функция, позволяющая сгруппировать несколько столбцов. -Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит. +Для столбцов, имеющих типы T1, T2, ... возвращает кортеж типа Tuple(T1, T2, ...), содержащий эти столбцы. Выполнение функции ничего не стоит. Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу. -С помощью функции реализуется оператор `(x, y, …)`. +С помощью функции реализуется оператор `(x, y, ...)`. **Синтаксис** ``` sql -tuple(x, y, …) +tuple(x, y, ...) ``` ## tupleElement {#tupleelement} diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 3c6e6151ef8..087891f4347 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -14,7 +14,7 @@ sidebar_label: "Функции для работы с URL" ### protocol {#protocol} -Возвращает протокол. Примеры: http, ftp, mailto, magnet… +Возвращает протокол. Примеры: http, ftp, mailto, magnet... ### domain {#domain} diff --git a/docs/ru/sql-reference/statements/alter/comment.md b/docs/ru/sql-reference/statements/alter/comment.md index 727af15d03e..f841c8540f3 100644 --- a/docs/ru/sql-reference/statements/alter/comment.md +++ b/docs/ru/sql-reference/statements/alter/comment.md @@ -4,7 +4,7 @@ sidebar_position: 51 sidebar_label: COMMENT --- -# ALTER TABLE … MODIFY COMMENT {#alter-modify-comment} +# ALTER TABLE ... MODIFY COMMENT {#alter-modify-comment} Добавляет, изменяет или удаляет комментарий к таблице, независимо от того, был ли он установлен раньше или нет. Изменение комментария отражается как в системной таблице [system.tables](../../../operations/system-tables/tables.md), так и в результате выполнения запроса `SHOW CREATE TABLE`. diff --git a/docs/ru/sql-reference/statements/alter/delete.md b/docs/ru/sql-reference/statements/alter/delete.md index dc968a17349..c91a79f5cdd 100644 --- a/docs/ru/sql-reference/statements/alter/delete.md +++ b/docs/ru/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE {#alter-mutations} +# ALTER TABLE ... DELETE {#alter-mutations} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md index 07f5ff0a298..e8b8af39e11 100644 --- a/docs/ru/sql-reference/statements/alter/index.md +++ b/docs/ru/sql-reference/statements/alter/index.md @@ -46,7 +46,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ### Мутации {#mutations} -Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией. +Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией. Конвертировать существующие таблицы для работы с мутациями не нужно. Но после применения первой мутации формат данных таблицы становится несовместимым с предыдущими версиями и откатиться на предыдущую версию уже не получится. diff --git a/docs/ru/sql-reference/statements/alter/update.md b/docs/ru/sql-reference/statements/alter/update.md index b2032ac77d1..01574a8a9b7 100644 --- a/docs/ru/sql-reference/statements/alter/update.md +++ b/docs/ru/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE {#alter-table-update-statements} +# ALTER TABLE ... UPDATE {#alter-table-update-statements} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr diff --git a/docs/ru/sql-reference/statements/alter/view.md b/docs/ru/sql-reference/statements/alter/view.md index e6f6730ff99..53e295f6bbe 100644 --- a/docs/ru/sql-reference/statements/alter/view.md +++ b/docs/ru/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query} +# Выражение ALTER TABLE ... MODIFY QUERY {#alter-modify-query} -Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. +Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE ... MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md). diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 032bdc6e6d4..8fa30446bb3 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -60,7 +60,7 @@ AS SELECT ... Если указано `POPULATE`, то при создании представления в него будут добавлены данные, уже содержащиеся в исходной таблице, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Если `POPULATE` не указано, представление будет содержать только данные, добавленные в таблицу после создания представления. Использовать `POPULATE` не рекомендуется, так как в представление не попадут данные, добавляемые в таблицу во время создания представления. -Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. +Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`... Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. Выполнение запросов [ALTER](../../../sql-reference/statements/alter/view.md) над материализованными представлениями имеет свои особенности, поэтому эти запросы могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления. diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 747e36b8809..309d4852b11 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -73,7 +73,7 @@ INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -Например, следующий формат запроса идентичен базовому варианту INSERT … VALUES: +Например, следующий формат запроса идентичен базовому варианту INSERT ... VALUES: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 5331cf00728..546a674d41a 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -116,7 +116,7 @@ SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UIn **Пример** -Запрос данных из файлов с именами `file000`, `file001`, … , `file999`: +Запрос данных из файлов с именами `file000`, `file001`, ... , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32'); diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index fe40cb0c507..2847a95bf19 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -108,7 +108,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. ::: -Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`: ``` sql SELECT count(*) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index 7afcc07c6fb..c91d8bcf4d1 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -190,7 +190,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" - 如果在获取系统数据时发生了zookeeper异常。副本,将其显示在单独的列中。 这实现了 [#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 原子删除destroy上的MergeTree数据部分。 [#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon)) - 支持分布式表的行级安全性。 [#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([伊万](https://github.com/abyss7)) -- Now we recognize suffix (like KB, KiB…) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- Now we recognize suffix (like KB, KiB...) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) - 在构建大型连接的结果时防止内存不足。 [#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2)) - 在交互模式下为建议添加群集名称 `clickhouse-client`. [#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([伊万](https://github.com/abyss7)) @@ -523,7 +523,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" - 现在后台在磁盘之间移动,运行它的seprate线程池。 [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) - `SYSTEM RELOAD DICTIONARY` 现在同步执行。 [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) - 堆栈跟踪现在显示物理地址(对象文件中的偏移量),而不是虚拟内存地址(加载对象文件的位置)。 这允许使用 `addr2line` 当二进制独立于位置并且ASLR处于活动状态时。 这修复 [#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) -- 支持行级安全筛选器的新语法: `
`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) +- 支持行级安全筛选器的新语法: `...
`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) - 现在 `cityHash` 功能可以与工作 `Decimal` 和 `UUID` 类型。 修复 [#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([米哈伊尔\*科罗托夫](https://github.com/millb)) - 从系统日志中删除了固定的索引粒度(它是1024),因为它在实现自适应粒度之后已经过时。 [#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 当ClickHouse在没有SSL的情况下编译时,启用MySQL兼容服务器。 [#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([尤里\*巴拉诺夫](https://github.com/yurriy)) diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index c0a08291e02..724b22ad461 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -53,7 +53,7 @@ memcpy(&buf[place_value], &x, sizeof(x)); for (size_t i = 0; i < rows; i += storage.index_granularity) ``` -**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,…)和三元运算符 `?:` 周围添加空格。 +**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,...)和三元运算符 `?:` 周围添加空格。 ``` cpp UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -82,7 +82,7 @@ dst.ClickGoodEvent = click.GoodEvent; 如有必要,运算符可以包裹到下一行。 在这种情况下,它前面的偏移量增加。 -**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, …) 和参数。 +**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, ...) 和参数。 **12.** 在逗号后面加一个空格,而不是在之前。同样的规则也适合 `for` 循环中的分号。 @@ -111,7 +111,7 @@ public: **16.** 如果对整个文件使用相同的 `namespace`,并且没有其他重要的东西,则 `namespace` 中不需要偏移量。 -**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, … +**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, ... 但是如果内部 `statement` 包含大括号或 `else`,则外部块应该用大括号括起来。 @@ -262,7 +262,7 @@ void executeQuery( 这个示例来源于 http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/。 -**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期…)。 +**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期...)。 **8.** 单行注释用三个斜杆: `///` ,多行注释以 `/**`开始。 这些注释会当做文档。 diff --git a/docs/zh/engines/table-engines/integrations/hdfs.md b/docs/zh/engines/table-engines/integrations/hdfs.md index 55648afe407..be673b6ce92 100644 --- a/docs/zh/engines/table-engines/integrations/hdfs.md +++ b/docs/zh/engines/table-engines/integrations/hdfs.md @@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs **示例** -创建具有名为文件的表 `file000`, `file001`, … , `file999`: +创建具有名为文件的表 `file000`, `file001`, ... , `file999`: ``` sql CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md index f2585decabf..f18814675c3 100644 --- a/docs/zh/engines/table-engines/integrations/s3.md +++ b/docs/zh/engines/table-engines/integrations/s3.md @@ -109,7 +109,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https: **示例** -使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表: +使用文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`来创建表: ``` sql CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); @@ -202,7 +202,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p !!! warning "Warning" 如果文件列表中包含有从0开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`. -4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表: +4. 从文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`创建表: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md index 4fecf4e5669..e283a4c7510 100644 --- a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -59,7 +59,7 @@ WHERE table = 'visits' └───────────┴────────────────┴────────┘ ``` -`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER … PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。 +`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER ... PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。 `name` 列为分区中数据片段的名称。在 [ALTER ATTACH PART](#alter_attach-partition) 语句中你可以使用此列值中来指定片段名称。 diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index bfa69338657..67bd681269b 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -702,7 +702,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - 插入(`INSERT`查询) - 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) - 从另一个副本下载 -- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 +- [ALTER TABLE ... FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: @@ -713,7 +713,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 -用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 +用户可以通过 [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 diff --git a/docs/zh/engines/table-engines/special/external-data.md b/docs/zh/engines/table-engines/special/external-data.md index 688e25402ab..06c6331b4f3 100644 --- a/docs/zh/engines/table-engines/special/external-data.md +++ b/docs/zh/engines/table-engines/special/external-data.md @@ -26,7 +26,7 @@ ClickHouse 允许向服务器发送处理查询所需的数据以及 SELECT 查 以下的参数是可选的:**–name** – 表的名称,如果省略,则采用 _data。 **–format** – 文件中的数据格式。 如果省略,则使用 TabSeparated。 -以下的参数必选一个:**–types** – 逗号分隔列类型的列表。例如:`UInt64,String`。列将被命名为 _1,_2,… +以下的参数必选一个:**–types** – 逗号分隔列类型的列表。例如:`UInt64,String`。列将被命名为 _1,_2,... **–structure**– 表结构的格式 `UserID UInt64`,`URL String`。定义列的名字以及类型。 在 «file» 中指定的文件将由 «format» 中指定的格式解析,使用在 «types» 或 «structure» 中指定的数据类型。该表将被上传到服务器,并在作为名称为 «name»临时表。 diff --git a/docs/zh/faq/general/olap.md b/docs/zh/faq/general/olap.md index b014419578b..c4b36b138fa 100644 --- a/docs/zh/faq/general/olap.md +++ b/docs/zh/faq/general/olap.md @@ -10,13 +10,13 @@ sidebar_position: 100 [OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward: Processing -: Some source data is processed… +: Some source data is processed... Analytical -: …to produce some analytical reports and insights… +: ...to produce some analytical reports and insights... Online -: …in real-time. +: ...in real-time. ## OLAP from the Business Perspective {#olap-from-the-business-perspective} diff --git a/docs/zh/getting-started/example-datasets/nyc-taxi.md b/docs/zh/getting-started/example-datasets/nyc-taxi.md index 9c487140df3..ceeb6fbb9e0 100644 --- a/docs/zh/getting-started/example-datasets/nyc-taxi.md +++ b/docs/zh/getting-started/example-datasets/nyc-taxi.md @@ -196,7 +196,7 @@ real 75m56.214s (也可以直接使用`COPY ... TO PROGRAM`从Postgres中导入数据) -数据中所有与天气相关的字段(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 +数据中所有与天气相关的字段(precipitation...average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 首先,我们使用单台服务器创建表,后面我们将在多台节点上创建这些表。 diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index ecfdcddbbe2..7d4c299b919 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -212,7 +212,7 @@ ORDER BY year └──────┴─────────┴───────────────────────────────────────────────────────┘ ``` -2020 年房价出事了!但这并不令人意外…… +2020 年房价出事了!但这并不令人意外... ### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods} diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 758992e4084..975d5eb764c 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -371,7 +371,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID :::note - 最后一个索引条目(上图中的“mark 1082”)存储了上图中颗粒1082的主键列的最大值。 -- 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411…”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411…”,这两个值来自不同的行。 +- 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411...”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411...”,这两个值来自不同的行。 - 主索引文件完全加载到主内存中。如果文件大于可用的空闲内存空间,则ClickHouse将发生错误。 ::: diff --git a/docs/zh/index.md b/docs/zh/index.md index fab00dbcd1b..ec4b6dce1f8 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -16,7 +16,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) | #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | | #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | | #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| #N | ... | ... | ... | ... | ... | 处于同一行中的数据总是被物理的存储在一起。 @@ -26,11 +26,11 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) | Row: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | 这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。 diff --git a/docs/zh/operations/settings/query-complexity.md b/docs/zh/operations/settings/query-complexity.md index 124d5fa5d1a..b1b5ca75018 100644 --- a/docs/zh/operations/settings/query-complexity.md +++ b/docs/zh/operations/settings/query-complexity.md @@ -196,7 +196,7 @@ Restrictions on the «maximum amount of something» can take the value 0, which Limits the number of rows in the hash table that is used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine. If a query contains multiple joins, ClickHouse checks this setting for every intermediate result. @@ -213,7 +213,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index c3b4194ed44..5e59196f56c 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1002,7 +1002,7 @@ ClickHouse生成异常 ## count_distinct_implementation {#settings-count_distinct_implementation} -指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。 +指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。 可能的值: diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md index 0cf91e45e86..c7b1bdd04be 100644 --- a/docs/zh/operations/system-tables/dictionaries.md +++ b/docs/zh/operations/system-tables/dictionaries.md @@ -21,7 +21,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. - `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. - `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). -- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. +- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, ..., type n)”. - `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。 - `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。 - `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md index cb1dcc35f5c..27d3375aebb 100644 --- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md @@ -80,7 +80,7 @@ FROM 在这种情况下,您应该记住您不知道直方图bin边界。 -## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} 检查序列是否包含与模式匹配的事件链。 @@ -167,7 +167,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM - [sequenceCount](#function-sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} 计算与模式匹配的事件链的数量。该函数搜索不重叠的事件链。当前链匹配后,它开始搜索下一个链。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md index 4dce65af1ed..253eb9ef82d 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,7 +7,7 @@ sidebar_position: 201 **语法** ``` sql -quantiles(level1, level2, …)(x) +quantiles(level1, level2, ...)(x) ``` 所有分位数函数(quantile)也有相应的分位数(quantiles)函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`。 这些函数一次计算所列的级别的所有分位数, 并返回结果值的数组。 diff --git a/docs/zh/sql-reference/data-types/aggregatefunction.md b/docs/zh/sql-reference/data-types/aggregatefunction.md index e8f28b367a5..80648eb165b 100644 --- a/docs/zh/sql-reference/data-types/aggregatefunction.md +++ b/docs/zh/sql-reference/data-types/aggregatefunction.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/aggregatefunction --- -# AggregateFunction(name, types_of_arguments…) {#data-type-aggregatefunction} +# AggregateFunction(name, types_of_arguments...) {#data-type-aggregatefunction} 聚合函数的中间状态,可以通过聚合函数名称加`-State`后缀的形式得到它。与此同时,当您需要访问该类型的最终状态数据时,您需要以相同的聚合函数名加`-Merge`后缀的形式来得到最终状态数据。 diff --git a/docs/zh/sql-reference/data-types/domains/index.md b/docs/zh/sql-reference/data-types/domains/index.md index c123b10f6fe..9f12018732b 100644 --- a/docs/zh/sql-reference/data-types/domains/index.md +++ b/docs/zh/sql-reference/data-types/domains/index.md @@ -19,9 +19,9 @@ Domain类型是特定实现的类型,它总是与某个现存的基础类型 ### Domains的额外特性 {#domainsde-e-wai-te-xing} - 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 -- 在INSERT INTO domain_table(domain_column) VALUES(…)中输入数据总是以更人性化的格式进行输入 +- 在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入 - 在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出 -- 在INSERT INTO domain_table FORMAT CSV …中,实现外部源数据以更人性化的格式载入 +- 在INSERT INTO domain_table FORMAT CSV ...中,实现外部源数据以更人性化的格式载入 ### Domains类型的限制 {#domainslei-xing-de-xian-zhi} diff --git a/docs/zh/sql-reference/data-types/fixedstring.md b/docs/zh/sql-reference/data-types/fixedstring.md index 633307938a9..d454e935fe7 100644 --- a/docs/zh/sql-reference/data-types/fixedstring.md +++ b/docs/zh/sql-reference/data-types/fixedstring.md @@ -18,8 +18,8 @@ slug: /zh/sql-reference/data-types/fixedstring 可以有效存储在`FixedString`类型的列中的值的示例: - 二进制表示的IP地址(IPv6使用`FixedString(16)`) -- 语言代码(ru_RU, en_US … ) -- 货币代码(USD, RUB … ) +- 语言代码(ru_RU, en_US ... ) +- 货币代码(USD, RUB ... ) - 二进制表示的哈希值(MD5使用`FixedString(16)`,SHA256使用`FixedString(32)`) 请使用[UUID](uuid.md)数据类型来存储UUID值,。 diff --git a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md index 5ef8256b483..57b30de0881 100644 --- a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/nested-data-structures/nested --- -# Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +# Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2} 嵌套数据结构类似于嵌套表。嵌套数据结构的参数(列名和类型)与 CREATE 查询类似。每个表可以包含任意多行嵌套数据结构。 diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md index 601cb602a78..fbaa76365ec 100644 --- a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md @@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction --- # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -`SimpleAggregateFunction(name, types_of_arguments…)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果,可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果,所以我们不必存储和处理任何额外的数据。 +`SimpleAggregateFunction(name, types_of_arguments...)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果,可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果,所以我们不必存储和处理任何额外的数据。 支持以下聚合函数: diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md index 004c80ff916..38813701c70 100644 --- a/docs/zh/sql-reference/data-types/tuple.md +++ b/docs/zh/sql-reference/data-types/tuple.md @@ -1,7 +1,7 @@ --- slug: /zh/sql-reference/data-types/tuple --- -# Tuple(T1, T2, …) {#tuplet1-t2} +# Tuple(T1, T2, ...) {#tuplet1-t2} 元组,其中每个元素都有单独的 [类型](index.md#data_types)。 diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index d150b94b8af..69db34e4a36 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -152,7 +152,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); └─────────────┴─────────────┴────────────────┴─────────────────┘ ``` -## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1} +## array(x1, ...), operator \[x1, ...\] {#arrayx1-operator-x1} 使用函数的参数作为数组元素创建一个数组。 参数必须是常量,并且具有最小公共类型的类型。必须至少传递一个参数,否则将不清楚要创建哪种类型的数组。也就是说,你不能使用这个函数来创建一个空数组(为此,使用上面描述的’emptyArray  \*’函数)。 @@ -337,7 +337,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) 设置为«NULL»的元素将作为普通的元素值处理。 -## arrayCount(\[func,\] arr1, …) {#array-count} +## arrayCount(\[func,\] arr1, ...) {#array-count} `func`将arr数组作为参数,其返回结果为非零值的数量。如果未指定“func”,则返回数组中非零元素的数量。 @@ -363,7 +363,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL) ## arrayEnumerate(arr) {#array_functions-arrayenumerate} -返回 Array \[1, 2, 3, …, length (arr) \] +返回 Array \[1, 2, 3, ..., length (arr) \] 此功能通常与ARRAY JOIN一起使用。它允许在应用ARRAY JOIN后为每个数组计算一次。例如: @@ -403,7 +403,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached) 此功能也可用于高阶函数。例如,您可以使用它来获取与条件匹配的元素的数组索引。 -## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr} +## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr} 返回与源数组大小相同的数组,其中每个元素表示与其下标对应的源数组元素在源数组中出现的次数。 例如:arrayEnumerateUniq( \[10,20,10,30 \])=  \[1,1,2,1 \]。 @@ -621,7 +621,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res 设置为«NULL»的数组元素作为普通的数组元素值处理。 -## arraySort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arraySort(\[func,\] arr, ...) {#array_functions-reverse-sort} 以升序对`arr`数组的元素进行排序。如果指定了`func`函数,则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数,那么`arraySort`函数也将解析与`func`函数参数相同数量的数组参数。更详细的示例在`arraySort`的末尾。 @@ -721,7 +721,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; !!! 注意 "注意" 为了提高排序效率, 使用了[施瓦茨变换](https://en.wikipedia.org/wiki/Schwartzian_transform)。 -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort} 以降序对`arr`数组的元素进行排序。如果指定了`func`函数,则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数,那么`arrayReverseSort`函数也将解析与`func`函数参数相同数量的数组作为参数。更详细的示例在`arrayReverseSort`的末尾。 @@ -822,7 +822,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` -## arrayUniq(arr, …) {#arrayuniqarr} +## arrayUniq(arr, ...) {#arrayuniqarr} 如果传递一个参数,则计算数组中不同元素的数量。 如果传递了多个参数,则它计算多个数组中相应位置的不同元素元组的数量。 @@ -1221,7 +1221,7 @@ select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]); └───────────────────────────────────────────────┘ ``` -## arrayMap(func, arr1, …) {#array-map} +## arrayMap(func, arr1, ...) {#array-map} 将从 `func` 函数的原始应用中获得的数组返回给 `arr` 数组中的每个元素。 @@ -1251,7 +1251,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res 请注意,`arrayMap` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFilter(func, arr1, …) {#array-filter} +## arrayFilter(func, arr1, ...) {#array-filter} 返回一个仅包含 `arr1` 中的元素的数组,其中 `func` 返回的值不是 0。 @@ -1284,7 +1284,7 @@ SELECT 请注意,`arrayFilter`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFill(func, arr1, …) {#array-fill} +## arrayFill(func, arr1, ...) {#array-fill} 从第一个元素到最后一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i - 1]`替换`arr1[i]`。`arr1`的第一个元素不会被替换。 @@ -1302,7 +1302,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, 请注意,`arrayFill` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayReverseFill(func, arr1, …) {#array-reverse-fill} +## arrayReverseFill(func, arr1, ...) {#array-reverse-fill} 从最后一个元素到第一个元素扫描`arr1`,如果`func`返回0,则用`arr1[i + 1]`替换`arr1[i]`。`arr1`的最后一个元素不会被替换。 @@ -1320,7 +1320,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 请注意,`arrayReverseFill`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arraySplit(func, arr1, …) {#array-split} +## arraySplit(func, arr1, ...) {#array-split} 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的左侧拆分。数组不会在第一个元素之前被拆分。 @@ -1338,7 +1338,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res 请注意,`arraySplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayReverseSplit(func, arr1, …) {#array-reverse-split} +## arrayReverseSplit(func, arr1, ...) {#array-reverse-split} 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时,数组将在元素的右侧拆分。数组不会在最后一个元素之后被拆分。 @@ -1356,37 +1356,37 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res 请注意,`arrayReverseSplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} 如果 `arr` 中至少有一个元素 `func` 返回 0 以外的值,则返回 1。否则,它返回 0。 请注意,`arrayExists`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} 如果 `func` 为 `arr` 中的所有元素返回 0 以外的值,则返回 1。否则,它返回 0。 请注意,`arrayAll`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFirst(func, arr1, …) {#array-first} +## arrayFirst(func, arr1, ...) {#array-first} 返回 `arr1` 数组中 `func` 返回非 0 的值的第一个元素。 请注意,`arrayFirst`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayLast(func, arr1, …) {#array-last} +## arrayLast(func, arr1, ...) {#array-last} 返回 `arr1` 数组中的最后一个元素,其中 `func` 返回的值不是 0。 请注意,`arrayLast`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayFirstIndex(func, arr1, …) {#array-first-index} +## arrayFirstIndex(func, arr1, ...) {#array-first-index} 返回 `arr1` 数组中第一个元素的索引,其中 `func` 返回的值不是 0。 请注意,`arrayFirstIndex`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它,并且不能省略。 -## arrayLastIndex(func, arr1, …) {#array-last-index} +## arrayLastIndex(func, arr1, ...) {#array-last-index} 返回 `arr1` 数组中最后一个元素的索引,其中 `func` 返回的值不是 0。 @@ -1612,7 +1612,7 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; └─────┘ ``` -## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} 返回源数组中元素的部分和的数组(运行总和)。如果指定了 func 函数,则数组元素的值在求和之前由该函数转换。 diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index d6493ffe605..18b9f3495c0 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -443,7 +443,7 @@ SELECT toStartOfSecond(dt64, 'Asia/Istanbul'); `toISOWeek()`是一个兼容函数,等效于`toWeek(date,3)`。 下表描述了mode参数的工作方式。 -| Mode | First day of week | Range | Week 1 is the first week … | +| Mode | First day of week | Range | Week 1 is the first week ... | |------|-------------------|-------|-------------------------------| | 0 | Sunday | 0-53 | with a Sunday in this year | | 1 | Monday | 0-53 | with 4 or more days this year | diff --git a/docs/zh/sql-reference/functions/higher-order-functions.md b/docs/zh/sql-reference/functions/higher-order-functions.md index 929dc6f3ea7..0e08f88bba1 100644 --- a/docs/zh/sql-reference/functions/higher-order-functions.md +++ b/docs/zh/sql-reference/functions/higher-order-functions.md @@ -15,13 +15,13 @@ slug: /zh/sql-reference/functions/higher-order-functions 除了’arrayMap’和’arrayFilter’以外的所有其他函数,都可以省略第一个参数(lambda函数)。在这种情况下,默认返回数组元素本身。 -### arrayMap(func, arr1, …) {#higher_order_functions-array-map} +### arrayMap(func, arr1, ...) {#higher_order_functions-array-map} 将arr 将从’func’函数的原始应用程序获得的数组返回到’arr’数组中的每个元素。 返回从原始应用程序获得的数组 ‘func’ 函数中的每个元素 ‘arr’ 阵列。 -### arrayFilter(func, arr1, …) {#arrayfilterfunc-arr1} +### arrayFilter(func, arr1, ...) {#arrayfilterfunc-arr1} 返回一个仅包含以下元素的数组 ‘arr1’ 对于哪个 ‘func’ 返回0以外的内容。 @@ -48,31 +48,31 @@ SELECT │ [2] │ └─────┘ -### arrayCount(\[func,\] arr1, …) {#arraycountfunc-arr1} +### arrayCount(\[func,\] arr1, ...) {#arraycountfunc-arr1} 返回数组arr中非零元素的数量,如果指定了’func’,则通过’func’的返回值确定元素是否为非零元素。 -### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} +### arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1} 返回数组’arr’中是否存在非零元素,如果指定了’func’,则使用’func’的返回值确定元素是否为非零元素。 -### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} +### arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1} 返回数组’arr’中是否存在为零的元素,如果指定了’func’,则使用’func’的返回值确定元素是否为零元素。 -### arraySum(\[func,\] arr1, …) {#arraysumfunc-arr1} +### arraySum(\[func,\] arr1, ...) {#arraysumfunc-arr1} 计算arr数组的总和,如果指定了’func’,则通过’func’的返回值计算数组的总和。 -### arrayFirst(func, arr1, …) {#arrayfirstfunc-arr1} +### arrayFirst(func, arr1, ...) {#arrayfirstfunc-arr1} 返回数组中第一个匹配的元素,函数使用’func’匹配所有元素,直到找到第一个匹配的元素。 -### arrayFirstIndex(func, arr1, …) {#arrayfirstindexfunc-arr1} +### arrayFirstIndex(func, arr1, ...) {#arrayfirstindexfunc-arr1} 返回数组中第一个匹配的元素的下标索引,函数使用’func’匹配所有元素,直到找到第一个匹配的元素。 -### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} +### arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1} 返回源数组部分数据的总和,如果指定了`func`函数,则使用`func`的返回值计算总和。 @@ -98,7 +98,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res │ [1,2,0,1] │ └───────────┘ -### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1} +### arraySort(\[func,\] arr1, ...) {#arraysortfunc-arr1} 返回升序排序`arr1`的结果。如果指定了`func`函数,则排序顺序由`func`的结果决定。 @@ -124,7 +124,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL]) │ [1,2,3,4,nan,nan,NULL,NULL] │ └───────────────────────────────────────────────┘ -### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1} +### arrayReverseSort(\[func,\] arr1, ...) {#arrayreversesortfunc-arr1} 返回降序排序`arr1`的结果。如果指定了`func`函数,则排序顺序由`func`的结果决定。 diff --git a/docs/zh/sql-reference/functions/in-functions.md b/docs/zh/sql-reference/functions/in-functions.md index 346e076310e..9858159a495 100644 --- a/docs/zh/sql-reference/functions/in-functions.md +++ b/docs/zh/sql-reference/functions/in-functions.md @@ -10,10 +10,10 @@ sidebar_label: IN 运算符 请参阅[IN 运算符](../../sql-reference/operators/in.md#select-in-operators)部分。 -## tuple(x, y, …), 运算符 (x, y, …) {#tuplex-y-operator-x-y} +## tuple(x, y, ...), 运算符 (x, y, ...) {#tuplex-y-operator-x-y} 函数用于对多个列进行分组。 -对于具有类型T1,T2,…的列,它返回包含这些列的元组(T1,T2,…)。 执行该函数没有任何成本。 +对于具有类型T1,T2,...的列,它返回包含这些列的元组(T1,T2,...)。 执行该函数没有任何成本。 元组通常用作IN运算符的中间参数值,或用于创建lambda函数的形参列表。 元组不能写入表。 ## tupleElement(tuple, n), 运算符 x.N {#tupleelementtuple-n-operator-x-n} diff --git a/docs/zh/sql-reference/functions/json-functions.md b/docs/zh/sql-reference/functions/json-functions.md index 52ec0ed1535..f07de564847 100644 --- a/docs/zh/sql-reference/functions/json-functions.md +++ b/docs/zh/sql-reference/functions/json-functions.md @@ -56,7 +56,7 @@ slug: /zh/sql-reference/functions/json-functions 以下函数基于[simdjson](https://github.com/lemire/simdjson),专为更复杂的JSON解析要求而设计。但上述假设2仍然适用。 -## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys} +## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys} 如果JSON中存在该值,则返回`1`。 @@ -83,7 +83,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a' select JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello' -## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys} +## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys} 返回JSON数组或JSON对象的长度。 @@ -94,7 +94,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3 select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2 -## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys} +## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys} 返回JSON值的类型。 @@ -106,13 +106,13 @@ slug: /zh/sql-reference/functions/json-functions select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String' select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array' -## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys} +## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys} -## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys} +## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys} -## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys} +## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys} -## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys} +## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys} 解析JSON并提取值。这些函数类似于`visitParam*`函数。 @@ -124,7 +124,7 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0 select JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300 -## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys} +## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys} 解析JSON并提取字符串。此函数类似于`visitParamExtractString`函数。 @@ -140,11 +140,11 @@ slug: /zh/sql-reference/functions/json-functions select JSONExtractString('{"abc":"\\u263"}', 'abc') = '' select JSONExtractString('{"abc":"hello}', 'abc') = '' -## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type} +## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type} 解析JSON并提取给定ClickHouse数据类型的值。 -这是以前的`JSONExtract函数的变体。 这意味着`JSONExtract(…, ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(…, ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。 +这是以前的`JSONExtract函数的变体。 这意味着`JSONExtract(..., ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(..., ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。 示例: @@ -156,7 +156,7 @@ slug: /zh/sql-reference/functions/json-functions SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Thursday' SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday' -## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} +## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type} 从JSON中解析键值对,其中值是给定的ClickHouse数据类型。 @@ -164,7 +164,7 @@ slug: /zh/sql-reference/functions/json-functions SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)]; -## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys} +## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys} 返回JSON的部分。 diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index 2eeaad63694..9c28ff867c5 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -90,7 +90,7 @@ SELECT 'some-file-name' AS a, basename(a) 将一个常量列变为一个非常量列。 在ClickHouse中,非常量列和常量列在内存中的表示方式不同。尽管函数对于常量列和非常量总是返回相同的结果,但它们的工作方式可能完全不同(执行不同的代码)。此函数用于调试这种行为。 -## ignore(…) {#ignore} +## ignore(...) {#ignore} 接受任何参数,包括`NULL`。始终返回0。 但是,函数的参数总是被计算的。该函数可以用于基准测试。 diff --git a/docs/zh/sql-reference/functions/string-functions.md b/docs/zh/sql-reference/functions/string-functions.md index d1914839d7c..c28735c7dc7 100644 --- a/docs/zh/sql-reference/functions/string-functions.md +++ b/docs/zh/sql-reference/functions/string-functions.md @@ -95,7 +95,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') 以Unicode字符为单位反转UTF-8编码的字符串。如果字符串不是UTF-8编码,则可能获取到一个非预期的结果(不会抛出异常)。 -## format(pattern, s0, s1, …) {#formatpattern-s0-s1} +## format(pattern, s0, s1, ...) {#formatpattern-s0-s1} 使用常量字符串`pattern`格式化其他参数。`pattern`字符串中包含由大括号`{}`包围的«替换字段»。 未被包含在大括号中的任何内容都被视为文本内容,它将原样保留在返回值中。 如果你需要在文本内容中包含一个大括号字符,它可以通过加倍来转义:`{{ '{{' }}`和`{{ '{{' }} '}}' }}`。 字段名称可以是数字(从零开始)或空(然后将它们视为连续数字) @@ -113,11 +113,11 @@ SELECT format('{} {}', 'Hello', 'World') └───────────────────────────────────┘ ``` -## concat(s1, s2, …) {#concat-s1-s2} +## concat(s1, s2, ...) {#concat-s1-s2} 将参数中的多个字符串拼接,不带分隔符。 -## concatAssumeInjective(s1, s2, …) {#concatassumeinjectives1-s2} +## concatAssumeInjective(s1, s2, ...) {#concatassumeinjectives1-s2} 与[concat](#concat-s1-s2)相同,区别在于,你需要保证concat(s1, s2, s3) -\> s4是单射的,它将用于GROUP BY的优化。 diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 972fd84e2a1..8ada76eeeda 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -204,7 +204,7 @@ SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); **语法** ```sql -multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) +multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) ``` ## multiSearchFirstIndex @@ -216,7 +216,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) **语法** ```sql -multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstIndex(haystack, \[needle1, needle2, ..., needlen\]) ``` ## multiSearchAny {#multisearchany} @@ -229,7 +229,7 @@ multiSearchFirstIndex(haystack, \[needle1, needle2, …, n **语法** ```sql -multiSearchAny(haystack, [needle1, needle2, …, needleN]) +multiSearchAny(haystack, [needle1, needle2, ..., needleN]) ``` ## match {#match} @@ -273,7 +273,7 @@ Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关 **语法** ```sql -multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAny(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAnyIndex @@ -283,7 +283,7 @@ multiMatchAny(haystack, \[pattern1, pattern2, …, pattern **语法** ```sql -multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAnyIndex(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiMatchAllIndices @@ -293,7 +293,7 @@ multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, pa **语法** ```sql -multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +multiMatchAllIndices(haystack, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAny @@ -307,7 +307,7 @@ multiMatchAllIndices(haystack, \[pattern1, pattern2, …, **语法** ```sql -multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAnyIndex @@ -317,7 +317,7 @@ multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern21, pattern2, …, patternn\]) +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## multiFuzzyMatchAllIndices @@ -327,7 +327,7 @@ multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2 **语法** ```sql -multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, ..., patternn\]) ``` ## extract diff --git a/docs/zh/sql-reference/functions/url-functions.md b/docs/zh/sql-reference/functions/url-functions.md index 44880b6ca1a..e7a0354c0bf 100644 --- a/docs/zh/sql-reference/functions/url-functions.md +++ b/docs/zh/sql-reference/functions/url-functions.md @@ -11,7 +11,7 @@ slug: /zh/sql-reference/functions/url-functions ### 协议 {#protocol} -返回URL的协议。例如: http、ftp、mailto、magnet… +返回URL的协议。例如: http、ftp、mailto、magnet... ### 域 {#domain} diff --git a/docs/zh/sql-reference/statements/alter/delete.md b/docs/zh/sql-reference/statements/alter/delete.md index 5eb77c35a93..f0b41c4e214 100644 --- a/docs/zh/sql-reference/statements/alter/delete.md +++ b/docs/zh/sql-reference/statements/alter/delete.md @@ -4,7 +4,7 @@ sidebar_position: 39 sidebar_label: DELETE --- -# ALTER TABLE … DELETE 语句 {#alter-mutations} +# ALTER TABLE ... DELETE 语句 {#alter-mutations} ``` sql ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md index e173837a16c..2286dcccd13 100644 --- a/docs/zh/sql-reference/statements/alter/index.md +++ b/docs/zh/sql-reference/statements/alter/index.md @@ -38,7 +38,7 @@ sidebar_label: ALTER ## Mutations 突变 {#mutations} -用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 +用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的,最明显的是[ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程,类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并,产生新的“突变”版本的部件。 diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md index 97b2b43d889..7cf37401dc5 100644 --- a/docs/zh/sql-reference/statements/alter/update.md +++ b/docs/zh/sql-reference/statements/alter/update.md @@ -4,7 +4,7 @@ sidebar_position: 40 sidebar_label: UPDATE --- -# ALTER TABLE … UPDATE 语句 {#alter-table-update-statements} +# ALTER TABLE ... UPDATE 语句 {#alter-table-update-statements} ``` sql ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr diff --git a/docs/zh/sql-reference/statements/alter/view.md b/docs/zh/sql-reference/statements/alter/view.md index 34a612803c1..a19d918612a 100644 --- a/docs/zh/sql-reference/statements/alter/view.md +++ b/docs/zh/sql-reference/statements/alter/view.md @@ -4,9 +4,9 @@ sidebar_position: 50 sidebar_label: VIEW --- -# ALTER TABLE … MODIFY QUERY 语句 {#alter-modify-query} +# ALTER TABLE ... MODIFY QUERY 语句 {#alter-modify-query} -当使用`ALTER TABLE … MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时,可以修改`SELECT`查询。当物化视图在没有 `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。 +当使用`ALTER TABLE ... MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时,可以修改`SELECT`查询。当物化视图在没有 `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。 如果一个物化视图使用`TO [db.]name`,你必须先 [DETACH](../detach.mdx) 视图。用[ALTER TABLE](index.md)修改目标表,然后 [ATTACH](../attach.mdx)之前分离的(`DETACH`)视图。 diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index bce0994ecd2..49a1d66bdf1 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -55,7 +55,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 如果指定`POPULATE`,则在创建视图时将现有表数据插入到视图中,就像创建一个`CREATE TABLE ... AS SELECT ...`一样。 否则,查询仅包含创建视图后插入表中的数据。 我们**不建议**使用POPULATE,因为在创建视图期间插入表中的数据不会插入其中。 -`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`……请注意,相应的转换是在每个插入数据块上独立执行的。 例如,如果设置了`GROUP BY`,则在插入期间聚合数据,但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`,例如`SummingMergeTree`。 +`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`...请注意,相应的转换是在每个插入数据块上独立执行的。 例如,如果设置了`GROUP BY`,则在插入期间聚合数据,但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`,例如`SummingMergeTree`。 在物化视图上执行[ALTER](../../../sql-reference/statements/alter/index.md)查询有局限性,因此可能不方便。 如果物化视图使用构造`TO [db.]name`,你可以`DETACH`视图,为目标表运行`ALTER`,然后`ATTACH`先前分离的(`DETACH`)视图。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index f80c0a8a8ea..a08a78b6f1d 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -68,7 +68,7 @@ SELECT * FROM insert_select_testtable; INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set ``` -例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: +例如,下面的查询所使用的输入格式就与上面INSERT ... VALUES的中使用的输入格式相同: ``` sql INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... diff --git a/docs/zh/sql-reference/statements/select/limit.md b/docs/zh/sql-reference/statements/select/limit.md index 2bbf2949707..795f3f4ecd1 100644 --- a/docs/zh/sql-reference/statements/select/limit.md +++ b/docs/zh/sql-reference/statements/select/limit.md @@ -13,11 +13,11 @@ sidebar_label: LIMIT 如果没有 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句显式排序结果,结果的行选择可能是任意的和非确定性的。 -## LIMIT … WITH TIES 修饰符 {#limit-with-ties} +## LIMIT ... WITH TIES 修饰符 {#limit-with-ties} 如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ,并且声明了 `ORDER BY expr_list`, 除了得到无修饰符的结果(正常情况下的 `limit n`, 前n行数据), 还会返回与第`n`行具有相同排序字段的行(即如果第n+1行的字段与第n行 拥有相同的排序字段,同样返回该结果. -此修饰符可以与: [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用. +此修饰符可以与: [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用. 例如以下查询: diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md index 3286fc9f9e7..2f2d9a4959c 100644 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -89,7 +89,7 @@ SELECT a, b, c FROM t ORDER BY a, b, c ## ORDER BY Expr WITH FILL Modifier {#orderby-with-fill} -此修饰符可以与 [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用. +此修饰符可以与 [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用. 可以在`ORDER BY expr`之后用可选的`FROM expr`,`TO expr`和`STEP expr`参数来设置`WITH FILL`修饰符。 所有`expr`列的缺失值将被顺序填充,而其他列将被填充为默认值。 diff --git a/docs/zh/sql-reference/table-functions/file.md b/docs/zh/sql-reference/table-functions/file.md index 28682255738..fa1ec12f7df 100644 --- a/docs/zh/sql-reference/table-functions/file.md +++ b/docs/zh/sql-reference/table-functions/file.md @@ -114,7 +114,7 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32') **示例** -从名为 `file000`, `file001`, … , `file999`的文件中查询数据: +从名为 `file000`, `file001`, ... , `file999`的文件中查询数据: ``` sql SELECT count(*) diff --git a/docs/zh/sql-reference/table-functions/hdfs.md b/docs/zh/sql-reference/table-functions/hdfs.md index b10b10ae2d2..f8320d8d0bb 100644 --- a/docs/zh/sql-reference/table-functions/hdfs.md +++ b/docs/zh/sql-reference/table-functions/hdfs.md @@ -84,7 +84,7 @@ FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value U **示例** -从名为 `file000`, `file001`, … , `file999`的文件中查询数据: +从名为 `file000`, `file001`, ... , `file999`的文件中查询数据: ``` sql SELECT count(*) diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index f7384a7526e..4f2c7299d95 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -99,7 +99,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi !!! warning "Warning" 如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 -计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数: +计算名为 `file-000.csv`, `file-001.csv`, ... , `file-999.csv` 文件的总行数: ``` sql SELECT count(*) From 713764f62fa92db1fab04dcb426682b4859d6de1 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 14:01:00 +0200 Subject: [PATCH 547/651] Add missing space before link --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 4501d1f43d3..829d46df9fa 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -37,7 +37,7 @@ getMacro(name); **Returned value** -- Value of the specified macro.[String](../../sql-reference/data-types/string.md). +- Value of the specified macro. [String](../../sql-reference/data-types/string.md). **Example** From dd7f3d1ba23bf2e18545ece2675f9836d84d7f69 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 May 2024 14:11:30 +0200 Subject: [PATCH 548/651] Fix test --- tests/integration/test_storage_s3/test.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dc929b7db46..09b27fff1e8 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1816,27 +1816,13 @@ def test_schema_inference_cache(started_cluster): check_cache(instance, []) run_describe_query(instance, files, storage_name, started_cluster, bucket) - check_cache_misses( - instance, - files, - storage_name, - started_cluster, - bucket, - 4 if storage_name == "url" else 1, - ) + check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4) instance.query("system drop schema cache") check_cache(instance, []) run_describe_query(instance, files, storage_name, started_cluster, bucket) - check_cache_misses( - instance, - files, - storage_name, - started_cluster, - bucket, - 4 if storage_name == "url" else 1, - ) + check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4) instance.query("system drop schema cache") From 147516f1626f656da5fc4dcc0d9254202a8de860 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 16 Apr 2024 13:05:07 +0000 Subject: [PATCH 549/651] Fix AST fuzzer failure --- src/Functions/FunctionHelpers.cpp | 2 ++ src/Functions/splitByRegexp.cpp | 10 ++++------ .../0_stateless/01866_split_by_regexp.reference | 1 + tests/queries/0_stateless/01866_split_by_regexp.sql | 3 +++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index d85bb0e7060..3b057779ffe 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -21,6 +21,8 @@ namespace ErrorCodes const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column) { + if (!column) + return {}; if (!isColumnConst(*column)) return {}; diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp index e28fe9c38bb..042db97794d 100644 --- a/src/Functions/splitByRegexp.cpp +++ b/src/Functions/splitByRegexp.cpp @@ -164,6 +164,7 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return SplitByRegexpImpl::getNumberOfArguments(); } bool isVariadic() const override { return SplitByRegexpImpl::isVariadic(); } + /// ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return SplitByRegexpImpl::getArgumentsThatAreAlwaysConstant(); } FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { @@ -182,14 +183,11 @@ public: private: bool patternIsTrivialChar(const ColumnsWithTypeAndName & arguments) const { + if (!arguments[0].column.get()) + return false; const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get()); if (!col) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of function {}. " - "Must be constant string.", - arguments[0].column->getName(), - getName()); + return false; String pattern = col->getValue(); if (pattern.size() == 1) diff --git a/tests/queries/0_stateless/01866_split_by_regexp.reference b/tests/queries/0_stateless/01866_split_by_regexp.reference index 62939940545..552d4d1f96a 100644 --- a/tests/queries/0_stateless/01866_split_by_regexp.reference +++ b/tests/queries/0_stateless/01866_split_by_regexp.reference @@ -17,3 +17,4 @@ Test fallback of splitByRegexp to splitByChar if regexp is trivial ['a','b','c'] ['a|b|c'] ['a\\b\\c'] +AST Fuzzer failure diff --git a/tests/queries/0_stateless/01866_split_by_regexp.sql b/tests/queries/0_stateless/01866_split_by_regexp.sql index 570bd1ba5c0..bc25d3e1093 100644 --- a/tests/queries/0_stateless/01866_split_by_regexp.sql +++ b/tests/queries/0_stateless/01866_split_by_regexp.sql @@ -20,3 +20,6 @@ select splitByRegexp('{', 'a{b{c'); select splitByRegexp('}', 'a}b}c'); select splitByRegexp('|', 'a|b|c'); select splitByRegexp('\\', 'a\\b\\c'); + +SELECT 'AST Fuzzer failure'; +SELECT splitByRegexp(materialize(1), NULL, 3) -- { serverError ILLEGAL_COLUMN } From b1fe9ab5f0aa24408321382e9651517f7808a478 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 23 May 2024 15:33:21 +0200 Subject: [PATCH 550/651] CI: dependency fix for changelog.py #do_not_test --- tests/ci/ci.py | 3 ++- tests/ci/github_helper.py | 10 +++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index be922a306e1..99555b06bbf 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -45,6 +45,7 @@ from env_helper import ( S3_BUILDS_BUCKET, TEMP_PATH, GITHUB_RUN_ID, + GITHUB_REPOSITORY, ) from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, Git @@ -1913,7 +1914,7 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None: print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!") else: print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]") - GitHub.cancel_wf(run_id) + GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), run_id) def main() -> int: diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index 81603c66bae..eb0f6c24527 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -22,9 +22,6 @@ from github.NamedUser import NamedUser as NamedUser from github.PullRequest import PullRequest as PullRequest from github.Repository import Repository as Repository -from env_helper import GITHUB_REPOSITORY -from get_robot_token import get_best_robot_token - # pylint: enable=useless-import-alias CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache") @@ -265,12 +262,11 @@ class GitHub(github.Github): assert isinstance(value, int) self._retries = value - # minimalistic static methods not using pygithub + # static methods not using pygithub @staticmethod - def cancel_wf(run_id, strict=False): - token = get_best_robot_token() + def cancel_wf(repo, run_id, token, strict=False): headers = {"Authorization": f"token {token}"} - url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/actions/runs/{run_id}/cancel" + url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/cancel" try: response = requests.post(url, headers=headers, timeout=10) response.raise_for_status() From 6e3a609907192d7cc378fb209d0e2431b8859eb0 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 15:43:17 +0200 Subject: [PATCH 551/651] Fix formatting in ru/index.md --- docs/ru/index.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/ru/index.md b/docs/ru/index.md index d551d492af5..02be8912b94 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -12,10 +12,10 @@ ClickHouse — столбцовая система управления база | Строка | WatchID | JavaEnable | Title | GoodEvent | EventTime | |--------|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | ... | ... | ... | ... | ... | +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | ... | ... | ... | ... | ... | То есть, значения, относящиеся к одной строке, физически хранятся рядом. @@ -24,13 +24,13 @@ ClickHouse — столбцовая система управления база В столбцовых СУБД данные хранятся в таком порядке: -| Строка: | #0 | #1 | #2 | #N | +| Строка: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | -| JavaEnable: | 1 | 0 | 1 | ... | -| Title: | Investor Relations | Contact us | Mission | ... | -| GoodEvent: | 1 | 1 | 1 | ... | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | В примерах изображён только порядок расположения данных. То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе. From e24253c097ed2f0325c9be77fc87ebbe8f086a5c Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 15:45:26 +0200 Subject: [PATCH 552/651] Fix formatting in zh/index.md --- docs/zh/index.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/zh/index.md b/docs/zh/index.md index ec4b6dce1f8..c092f296722 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -13,10 +13,10 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) | Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | |-----|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | ... | ... | ... | ... | ... | +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | ... | ... | ... | ... | ... | 处于同一行中的数据总是被物理的存储在一起。 @@ -24,13 +24,13 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 在列式数据库系统中,数据按如下的顺序存储: -| Row: | #0 | #1 | #2 | #N | +| Row: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | -| JavaEnable: | 1 | 0 | 1 | ... | -| Title: | Investor Relations | Contact us | Mission | ... | -| GoodEvent: | 1 | 1 | 1 | ... | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | ... | +| JavaEnable: | 1 | 0 | 1 | ... | +| Title: | Investor Relations | Contact us | Mission | ... | +| GoodEvent: | 1 | 1 | 1 | ... | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... | 这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。 From 87b4d43a3f93864c122f7fe2451c696720207809 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 15:48:20 +0200 Subject: [PATCH 553/651] Update return type formatting --- .../functions/arithmetic-functions.md | 8 +- .../functions/array-functions.md | 86 +++---- .../sql-reference/functions/bit-functions.md | 24 +- .../functions/bitmap-functions.md | 22 +- .../functions/date-time-functions.md | 222 +++++------------- .../functions/distance-functions.md | 58 ++--- .../functions/encoding-functions.md | 38 +-- .../functions/ext-dict-functions.md | 24 +- .../sql-reference/functions/hash-functions.md | 134 +++-------- .../sql-reference/functions/introspection.md | 29 +-- .../functions/ip-address-functions.md | 20 +- .../sql-reference/functions/json-functions.md | 22 +- .../sql-reference/functions/math-functions.md | 4 +- .../functions/other-functions.md | 140 ++++------- .../functions/random-functions.md | 56 ++--- .../functions/rounding-functions.md | 2 +- .../functions/splitting-merging-functions.md | 57 +++-- .../functions/string-functions.md | 100 ++------ .../functions/string-search-functions.md | 64 ++--- .../functions/time-series-functions.md | 14 +- .../functions/time-window-functions.md | 8 +- .../functions/tuple-functions.md | 36 +-- .../functions/tuple-map-functions.md | 16 +- .../functions/type-conversion-functions.md | 8 +- .../sql-reference/functions/ulid-functions.md | 4 +- .../sql-reference/functions/url-functions.md | 36 +-- .../sql-reference/functions/uuid-functions.md | 8 +- 27 files changed, 369 insertions(+), 871 deletions(-) diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 6d95f3dc358..aef4150ff50 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -320,9 +320,7 @@ multiplyDecimal(a, b[, result_scale]) **Returned value** -- The result of multiplication with given scale. - -Type: [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of multiplication with given scale. [Decimal256](../../sql-reference/data-types/decimal.md). **Example** @@ -396,9 +394,7 @@ divideDecimal(a, b[, result_scale]) **Returned value** -- The result of division with given scale. - -Type: [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of division with given scale. [Decimal256](../../sql-reference/data-types/decimal.md). **Example** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 87e733a4b0c..512874d20b7 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -30,9 +30,7 @@ The function also works for [strings](string-functions.md#empty) or [UUID](uuid- **Returned value** -- Returns `1` for an empty array or `0` for a non-empty array. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty array or `0` for a non-empty array. [UInt8](../data-types/int-uint.md). **Example** @@ -74,9 +72,7 @@ The function also works for [strings](string-functions.md#notempty) or [UUID](uu **Returned value** -- Returns `1` for a non-empty array or `0` for an empty array. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty array or `0` for an empty array. [UInt8](../data-types/int-uint.md). **Example** @@ -797,9 +793,11 @@ The sizes of the two vectors must be equal. Arrays and Tuples may also contain m **Returned value** -- The dot product of the two vectors. +- The dot product of the two vectors. [Numeric](https://clickhouse.com/docs/en/native-protocol/columns#numeric-types). -Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. +:::note +The return type is determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. +::: **Examples** @@ -1186,9 +1184,7 @@ arrayShingles(array, length) **Returned value** -- An array of generated shingles. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array of generated shingles. [Array](../../sql-reference/data-types/array.md). **Examples** @@ -1562,9 +1558,7 @@ arrayDifference(array) **Returned values** -Returns an array of differences between adjacent array elements. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +Returns an array of differences between adjacent array elements. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). **Example** @@ -1841,9 +1835,7 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) **Returned value** -- Array containing results of the aggregate function over specified ranges. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing results of the aggregate function over specified ranges. [Array](../../sql-reference/data-types/array.md). **Example** @@ -1986,9 +1978,7 @@ arrayCompact(arr) **Returned value** -The array without duplicate. - -Type: `Array`. +The array without duplicate. [Array](../data-types/array.md). **Example** @@ -2024,9 +2014,7 @@ The function can take any number of arrays of different types. All the input arr **Returned value** -- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../../sql-reference/data-types/array.md). **Example** @@ -2383,7 +2371,8 @@ arrayMin([func,] arr) - The minimum of function values (or the array minimum). -Type: if `func` is specified, matches `func` return value type, else matches the array elements type. +:::note +If `func` is specified, then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. **Examples** @@ -2438,7 +2427,9 @@ arrayMax([func,] arr) - The maximum of function values (or the array maximum). -Type: if `func` is specified, matches `func` return value type, else matches the array elements type. +:::note +if `func` is specified then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. +::: **Examples** @@ -2493,7 +2484,14 @@ arraySum([func,] arr) - The sum of the function values (or the array sum). -Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). +:::note +Return type: + +- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md). +- For floating point numbers — [Float64](../../sql-reference/data-types/float.md). +- For numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md). +- For numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). +::: **Examples** @@ -2546,9 +2544,7 @@ arrayAvg([func,] arr) **Returned value** -- The average of function values (or the array average). - -Type: [Float64](../../sql-reference/data-types/float.md). +- The average of function values (or the array average). [Float64](../../sql-reference/data-types/float.md). **Examples** @@ -2596,9 +2592,7 @@ arrayCumSum(arr) **Returned value** -- Returns an array of the partial sums of the elements in the source array. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +- Returns an array of the partial sums of the elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). Example: @@ -2630,9 +2624,7 @@ arrayCumSumNonNegative(arr) **Returned value** -- Returns an array of non-negative partial sums of elements in the source array. - -Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). +- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). ``` sql SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res @@ -2662,9 +2654,7 @@ arrayProduct(arr) **Returned value** -- A product of array's elements. - -Type: [Float64](../../sql-reference/data-types/float.md). +- A product of array's elements. [Float64](../../sql-reference/data-types/float.md). **Examples** @@ -2714,9 +2704,7 @@ arrayRotateLeft(arr, n) **Returned value** -- An array rotated to the left by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array rotated to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md). **Examples** @@ -2780,9 +2768,7 @@ arrayRotateRight(arr, n) **Returned value** -- An array rotated to the right by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array rotated to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md). **Examples** @@ -2848,9 +2834,7 @@ arrayShiftLeft(arr, n[, default]) **Returned value** -- An array shifted to the left by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array shifted to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md). **Examples** @@ -2944,9 +2928,7 @@ arrayShiftRight(arr, n[, default]) **Returned value** -- An array shifted to the right by the specified number of elements. - -Type: [Array](../../sql-reference/data-types/array.md). +- An array shifted to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md). **Examples** @@ -3038,9 +3020,7 @@ arrayRandomSample(arr, samples) **Returned Value** -- An array containing a random sample of elements from the input array. - -Type: [Array](../data-types/array.md). +- An array containing a random sample of elements from the input array. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 0951c783aae..709f438d67f 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -188,9 +188,7 @@ SELECT bitTest(number, index) **Returned values** -Returns a value of bit at specified position. - -Type: `UInt8`. +Returns a value of bit at specified position. [UInt8](../data-types/int-uint.md). **Example** @@ -253,9 +251,7 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...) **Returned values** -Returns result of logical conjuction. - -Type: `UInt8`. +Returns result of logical conjuction. [UInt8](../data-types/int-uint.md). **Example** @@ -318,9 +314,7 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...) **Returned values** -Returns result of logical disjunction. - -Type: `UInt8`. +Returns result of logical disjunction. [UInt8](../data-types/int-uint.md). **Example** @@ -372,11 +366,11 @@ bitCount(x) **Returned value** -- Number of bits set to one in the input number. +- Number of bits set to one in the input number. [UInt8](../data-types/int-uint.md). -The function does not convert input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. - -Type: `UInt8`. +:::note +The function does not convert the input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. +::: **Example** @@ -413,9 +407,7 @@ bitHammingDistance(int1, int2) **Returned value** -- The Hamming distance. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- The Hamming distance. [UInt8](../../sql-reference/data-types/int-uint.md). **Examples** diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 379be302881..e546de039da 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -75,8 +75,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `range_end` – End of the range (exclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_end` – End of the range (exclusive). [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -105,8 +105,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – Maximum cardinality of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md). +- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -134,9 +134,9 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** -- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). -- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild). +- `offset` – The position of the first element of the subset. [UInt32](../../sql-reference/data-types/int-uint.md). +- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -163,14 +163,12 @@ bitmapContains(bitmap, needle) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `needle` – Searched bit value. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `needle` – Searched bit value. [UInt32](../../sql-reference/data-types/int-uint.md). **Returned values** -- 0 — If `bitmap` does not contain `needle`. -- 1 — If `bitmap` contains `needle`. - -Type: `UInt8`. +- 0 — If `bitmap` does not contain `needle`. [UInt8](../data-types/int-uint.md). +- 1 — If `bitmap` contains `needle`. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 843f22e5a6f..7de402d2349 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -50,9 +50,7 @@ Alias: **Returned value** -- A date created from the arguments. - -Type: [Date](../../sql-reference/data-types/date.md). +- A date created from the arguments. [Date](../../sql-reference/data-types/date.md). **Example** @@ -109,9 +107,7 @@ makeDateTime(year, month, day, hour, minute, second[, timezone]) **Returned value** -- A date with time created from the arguments. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- A date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -152,7 +148,7 @@ Alias: `TIMESTAMP` **Arguments** -- `expr` - Date or date with time. Type: [String](../../sql-reference/data-types/string.md). +- `expr` - Date or date with time. [String](../../sql-reference/data-types/string.md). - `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md). **Examples** @@ -200,9 +196,7 @@ Alias: `timezone`. **Returned value** -- Timezone. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../../sql-reference/data-types/string.md). **Example** @@ -237,9 +231,7 @@ Alias: `serverTimezone`. **Returned value** -- Timezone. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../../sql-reference/data-types/string.md). **Example** @@ -278,9 +270,7 @@ Alias: `toTimezone`. **Returned value** -- Date and time. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Date and time. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -336,9 +326,7 @@ Alias: `timezoneOf`. **Returned value** -- Timezone name. - -Type: [String](../../sql-reference/data-types/string.md). +- Timezone name. [String](../../sql-reference/data-types/string.md). **Example** @@ -373,9 +361,7 @@ Alias: `timezoneOffset`. **Returned value** -- Offset from UTC in seconds. - -Type: [Int32](../../sql-reference/data-types/int-uint.md). +- Offset from UTC in seconds. [Int32](../../sql-reference/data-types/int-uint.md). **Example** @@ -410,9 +396,7 @@ Alias: `YEAR` **Returned value** -- The year of the given date/time - -Type: `UInt16` +- The year of the given date/time. [UInt16](../data-types/int-uint.md). **Example** @@ -446,9 +430,7 @@ Alias: `QUARTER` **Returned value** -- The quarter of the year (1, 2, 3 or 4) of the given date/time - -Type: `UInt8` +- The quarter of the year (1, 2, 3 or 4) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -482,9 +464,7 @@ Alias: `MONTH` **Returned value** -- The month of the year (1 - 12) of the given date/time - -Type: `UInt8` +- The month of the year (1 - 12) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -518,9 +498,7 @@ Alias: `DAYOFYEAR` **Returned value** -- The day of the year (1 - 366) of the given date/time - -Type: `UInt16` +- The day of the year (1 - 366) of the given date/time. [UInt16](../data-types/int-uint.md). **Example** @@ -554,9 +532,7 @@ Aliases: `DAYOFMONTH`, `DAY` **Returned value** -- The day of the month (1 - 31) of the given date/time - -Type: `UInt8` +- The day of the month (1 - 31) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -643,9 +619,7 @@ Alias: `HOUR` **Returned value** -- The hour of the day (0 - 23) of the given date/time - -Type: `UInt8` +- The hour of the day (0 - 23) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -679,9 +653,7 @@ Alias: `MINUTE` **Returned value** -- The minute of the hour (0 - 59) of the given date/time - -Type: `UInt8` +- The minute of the hour (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -715,9 +687,7 @@ Alias: `SECOND` **Returned value** -- The second in the minute (0 - 59) of the given date/time - -Type: `UInt8` +- The second in the minute (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md). **Example** @@ -763,9 +733,7 @@ Result: **Returned value** -- The millisecond in the minute (0 - 59) of the given date/time - -Type: `UInt16` +- The millisecond in the minute (0 - 59) of the given date/time. [UInt16](../data-types/int-uint.md). ## toUnixTimestamp @@ -782,9 +750,7 @@ toUnixTimestamp(str, [timezone]) **Returned value** -- Returns the unix timestamp. - -Type: `UInt32`. +- Returns the unix timestamp. [UInt32](../data-types/int-uint.md). **Example** @@ -842,9 +808,7 @@ toStartOfYear(value) **Returned value** -- The first day of the year of the input date/time - -Type: `Date` +- The first day of the year of the input date/time. [Date](../data-types/date.md). **Example** @@ -876,9 +840,7 @@ toStartOfISOYear(value) **Returned value** -- The first day of the year of the input date/time - -Type: `Date` +- The first day of the year of the input date/time. [Date](../data-types/date.md). **Example** @@ -911,9 +873,7 @@ toStartOfQuarter(value) **Returned value** -- The first day of the quarter of the given date/time - -Type: `Date` +- The first day of the quarter of the given date/time. [Date](../data-types/date.md). **Example** @@ -945,9 +905,7 @@ toStartOfMonth(value) **Returned value** -- The first day of the month of the given date/time - -Type: `Date` +- The first day of the month of the given date/time. [Date](../data-types/date.md). **Example** @@ -985,9 +943,7 @@ Alias: `LAST_DAY` **Returned value** -- The last day of the month of the given date/time - -Type: `Date` +- The last day of the month of the given date/time=. [Date](../data-types/date.md). **Example** @@ -1019,9 +975,7 @@ toMonday(value) **Returned value** -- The date of the nearest Monday on or prior to the given date - -Type: `Date` +- The date of the nearest Monday on or prior to the given date. [Date](../data-types/date.md). **Example** @@ -1057,9 +1011,7 @@ toStartOfWeek(t[, mode[, timezone]]) **Returned value** -- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode - -Type: `Date` +- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode. [Date](../data-types/date.md). **Example** @@ -1102,9 +1054,7 @@ toLastDayOfWeek(t[, mode[, timezone]]) **Returned value** -- The date of the nearest Sunday or Monday on or after the given date, depending on the mode - -Type: `Date` +- The date of the nearest Sunday or Monday on or after the given date, depending on the mode. [Date](../data-types/date.md). **Example** @@ -1144,9 +1094,7 @@ toStartOfDay(value) **Returned value** -- The start of the day of the given date/time - -Type: `DateTime` +- The start of the day of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1178,9 +1126,7 @@ toStartOfHour(value) **Returned value** -- The start of the hour of the given date/time - -Type: `DateTime` +- The start of the hour of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1214,9 +1160,7 @@ toStartOfMinute(value) **Returned value** -- The start of the minute of the given date/time - -Type: `DateTime` +- The start of the minute of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1253,9 +1197,7 @@ toStartOfSecond(value, [timezone]) **Returned value** -- Input value without sub-seconds. - -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +- Input value without sub-seconds. [DateTime64](../../sql-reference/data-types/datetime64.md). **Examples** @@ -1309,9 +1251,7 @@ toStartOfFiveMinutes(value) **Returned value** -- The start of the five-minute interval of the given date/time - -Type: `DateTime` +- The start of the five-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1349,9 +1289,7 @@ toStartOfTenMinutes(value) **Returned value** -- The start of the ten-minute interval of the given date/time - -Type: `DateTime` +- The start of the ten-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1389,9 +1327,7 @@ toStartOfFifteenMinutes(value) **Returned value** -- The start of the fifteen-minute interval of the given date/time - -Type: `DateTime` +- The start of the fifteen-minute interval of the given date/time. [DateTime](../data-types/datetime.md). **Example** @@ -1603,9 +1539,7 @@ Alias: `TO_DAYS` **Returned value** -The number of days passed since date 0000-01-01. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +The number of days passed since date 0000-01-01. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -1645,9 +1579,7 @@ Alias: `FROM_DAYS` **Returned value** -The date corresponding to the number of days passed since year zero. - -Type: [Date](../../sql-reference/data-types/date.md). +The date corresponding to the number of days passed since year zero. [Date](../../sql-reference/data-types/date.md). **Example** @@ -1709,9 +1641,7 @@ age('unit', startdate, enddate, [timezone]) **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md). **Example** @@ -1787,9 +1717,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md). **Example** @@ -1858,9 +1786,7 @@ Alias: `dateTrunc`. **Returned value** -- Value, truncated to the specified part of date. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Value, truncated to the specified part of date. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -1935,9 +1861,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Returned value** -Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2012,9 +1936,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2079,9 +2001,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Returned value** -Date or date with time with the specified `value` expressed in `unit` added to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2130,9 +2050,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2167,9 +2085,7 @@ addDate(date, interval) **Returned value** -Date or date with time obtained by adding `interval` to `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `interval` to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2210,9 +2126,7 @@ subDate(date, interval) **Returned value** -Date or date with time obtained by subtracting `interval` from `date`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `interval` from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2252,9 +2166,7 @@ now([timezone]) **Returned value** -- Current date and time. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -2303,9 +2215,7 @@ now64([scale], [timezone]) **Returned value** -- Current date and time with sub-second precision. - -Type: [DateTime64](../../sql-reference/data-types/datetime64.md). +- Current date and time with sub-second precision. [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -2339,9 +2249,7 @@ nowInBlock([timezone]) **Returned value** -- Current date and time at the moment of processing of each block of data. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time at the moment of processing of each block of data. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -2381,9 +2289,7 @@ today() **Returned value** -- Current date - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- Current date. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -2491,9 +2397,7 @@ YYYYMMDDToDate(yyyymmdd); **Returned value** -- a date created from the arguments. - -Type: [Date](../../sql-reference/data-types/date.md). +- a date created from the arguments. [Date](../../sql-reference/data-types/date.md). **Example** @@ -2534,9 +2438,7 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]); **Returned value** -- a date with time created from the arguments. - -Type: [DateTime](../../sql-reference/data-types/datetime.md). +- a date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md). **Example** @@ -3743,9 +3645,7 @@ dateName(date_part, date) **Returned value** -- The specified part of date. - -Type: [String](../../sql-reference/data-types/string.md#string) +- The specified part of date. [String](../../sql-reference/data-types/string.md#string) **Example** @@ -3781,9 +3681,7 @@ monthName(date) **Returned value** -- The name of the month. - -Type: [String](../../sql-reference/data-types/string.md#string) +- The name of the month. [String](../../sql-reference/data-types/string.md#string) **Example** @@ -3878,9 +3776,7 @@ toModifiedJulianDay(date) **Returned value** -- Modified Julian Day number. - -Type: [Int32](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Int32](../../sql-reference/data-types/int-uint.md). **Example** @@ -3912,9 +3808,7 @@ toModifiedJulianDayOrNull(date) **Returned value** -- Modified Julian Day number. - -Type: [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). **Example** @@ -3946,9 +3840,7 @@ fromModifiedJulianDay(day) **Returned value** -- Date in text form. - -Type: [String](../../sql-reference/data-types/string.md) +- Date in text form. [String](../../sql-reference/data-types/string.md) **Example** @@ -3980,9 +3872,7 @@ fromModifiedJulianDayOrNull(day) **Returned value** -- Date in text form. - -Type: [Nullable(String)](../../sql-reference/data-types/string.md) +- Date in text form. [Nullable(String)](../../sql-reference/data-types/string.md) **Example** diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 5f3514049c7..9fda491ac50 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -24,9 +24,7 @@ Alias: `normL1`. **Returned value** -- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. - -Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). **Examples** @@ -62,9 +60,7 @@ Alias: `normL2`. **Returned value** -- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). - -Type: [Float](../../sql-reference/data-types/float.md). +- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../../sql-reference/data-types/float.md). **Example** @@ -99,9 +95,7 @@ Alias: `normL2Squared`. **Returned value** -- L2-norm squared. - -Type: [Float](../../sql-reference/data-types/float.md). +- L2-norm squared. [Float](../../sql-reference/data-types/float.md). **Example** @@ -137,9 +131,7 @@ Alias: `normLinf`. **Returned value** -- Linf-norm or the maximum absolute value. - -Type: [Float](../../sql-reference/data-types/float.md). +- Linf-norm or the maximum absolute value. [Float](../../sql-reference/data-types/float.md). **Example** @@ -176,9 +168,7 @@ Alias: `normLp`. **Returned value** -- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) - -Type: [Float](../../sql-reference/data-types/float.md). +- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../../sql-reference/data-types/float.md). **Example** @@ -215,9 +205,7 @@ Alias: `distanceL1`. **Returned value** -- 1-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- 1-norm distance. [Float](../../sql-reference/data-types/float.md). **Example** @@ -254,9 +242,7 @@ Alias: `distanceL2`. **Returned value** -- 2-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- 2-norm distance. [Float](../../sql-reference/data-types/float.md). **Example** @@ -293,7 +279,7 @@ Alias: `distanceL2Squared`. **Returned value** -Type: [Float](../../sql-reference/data-types/float.md). +- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../../sql-reference/data-types/float.md). **Example** @@ -330,9 +316,7 @@ Alias: `distanceLinf`. **Returned value** -- Infinity-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- Infinity-norm distance. [Float](../../sql-reference/data-types/float.md). **Example** @@ -370,9 +354,7 @@ Alias: `distanceLp`. **Returned value** -- p-norm distance. - -Type: [Float](../../sql-reference/data-types/float.md). +- p-norm distance. [Float](../../sql-reference/data-types/float.md). **Example** @@ -409,9 +391,7 @@ Alias: `normalizeL1`. **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). **Example** @@ -447,9 +427,7 @@ Alias: `normalizeL1`. **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). **Example** @@ -485,9 +463,7 @@ Alias: `normalizeLinf `. **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). **Example** @@ -524,9 +500,7 @@ Alias: `normalizeLp `. **Returned value** -- Unit vector. - -Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). **Example** @@ -561,9 +535,7 @@ cosineDistance(vector1, vector2) **Returned value** -- Cosine of the angle between two vectors subtracted from one. - -Type: [Float](../../sql-reference/data-types/float.md). +- Cosine of the angle between two vectors subtracted from one. [Float](../../sql-reference/data-types/float.md). **Examples** diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 4f6da764b3c..bc64fdea427 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -22,9 +22,7 @@ char(number_1, [number_2, ..., number_n]); **Returned value** -- a string of given bytes. - -Type: `String`. +- a string of given bytes. [String](../data-types/string.md). **Example** @@ -102,9 +100,7 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str **Returned value** -- A string with the hexadecimal representation of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string with the hexadecimal representation of the argument. [String](../../sql-reference/data-types/string.md). **Examples** @@ -185,15 +181,13 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). +- `arg` — A string containing any number of hexadecimal digits. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). **Returned value** -- A binary string (BLOB). - -Type: [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../../sql-reference/data-types/string.md). **Example** @@ -251,9 +245,7 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str **Returned value** -- A string with the binary representation of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string with the binary representation of the argument. [String](../../sql-reference/data-types/string.md). **Examples** @@ -342,9 +334,7 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to **Returned value** -- A binary string (BLOB). - -Type: [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../../sql-reference/data-types/string.md). **Examples** @@ -400,9 +390,7 @@ bitPositionsToArray(arg) **Returned value** -- An array containing a list of positions of bits that equal `1`, in ascending order. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). **Example** @@ -458,9 +446,7 @@ mortonEncode(args) **Returned value** -- A UInt64 code - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md) **Example** @@ -500,9 +486,7 @@ Note: when using columns for `args` the provided `range_mask` tuple should still **Returned value** -- A UInt64 code - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md) **Example** @@ -621,9 +605,7 @@ mortonDecode(tuple_size, code) **Returned value** -- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md) +- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. [UInt64](../../sql-reference/data-types/int-uint.md) **Example** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 4149afce044..41657aafbbe 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -243,10 +243,8 @@ dictHas('dict_name', id_expr) **Returned value** -- 0, if there is no key. -- 1, if there is a key. - -Type: `UInt8`. +- 0, if there is no key. [UInt8](../data-types/int-uint.md). +- 1, if there is a key. [UInt8](../data-types/int-uint.md). ## dictGetHierarchy @@ -265,9 +263,7 @@ dictGetHierarchy('dict_name', key) **Returned value** -- Parents for the key. - -Type: [Array(UInt64)](../../sql-reference/data-types/array.md). +- Parents for the key. [Array(UInt64)](../../sql-reference/data-types/array.md). ## dictIsIn @@ -285,10 +281,8 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Returned value** -- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. -- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. - -Type: `UInt8`. +- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. [UInt8](../data-types/int-uint.md). +- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. [UInt8](../data-types/int-uint.md). ## dictGetChildren @@ -307,9 +301,7 @@ dictGetChildren(dict_name, key) **Returned values** -- First-level descendants for the key. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- First-level descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). **Example** @@ -357,9 +349,7 @@ dictGetDescendants(dict_name, key, level) **Returned values** -- Descendants for the key. - -Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- Descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 1cd7eeb7c83..89b95888f85 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -341,9 +341,7 @@ Even in these cases, we recommend applying the function offline and pre-calculat **Returned value** -- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). - -Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). **Example** @@ -381,9 +379,7 @@ This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust **Return value** -- BLAKE3 hash as a byte array with type FixedString(32). - -Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). **Example** @@ -540,9 +536,7 @@ This is just [JavaHash](#javahash) with zeroed out sign bit. This function is us **Returned value** -A `Int32` data type hash value. - -Type: `hiveHash`. +- `hiveHash` hash value. [Int32](../data-types/int-uint.md). **Example** @@ -679,9 +673,7 @@ gccMurmurHash(par1, ...) **Returned value** -- Calculated hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -718,9 +710,7 @@ MurmurHash(par1, ...) **Returned value** -- Calculated hash value. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt32](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -786,9 +776,7 @@ murmurHash3_128(expr) **Returned value** -A 128-bit `MurmurHash3` hash value. - -Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `MurmurHash3` hash value. [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). **Example** @@ -822,9 +810,7 @@ xxh3(expr) **Returned value** -A 64-bit `xxh3` hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +A 64-bit `xxh3` hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -856,9 +842,11 @@ SELECT xxHash64('') **Returned value** -A `UInt32` or `UInt64` data type hash value. +- Hash value. [UInt32/64](../data-types/int-uint.md). -Type: `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. +note::: +The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. +::: **Example** @@ -899,9 +887,7 @@ ngramSimHash(string[, ngramsize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -938,9 +924,7 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -977,9 +961,7 @@ ngramSimHashUTF8(string[, ngramsize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1016,9 +998,7 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1055,9 +1035,7 @@ wordShingleSimHash(string[, shinglesize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1094,9 +1072,7 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1133,9 +1109,7 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1172,9 +1146,7 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1208,9 +1180,7 @@ wyHash64(string) **Returned value** -- Hash value. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -1248,9 +1218,7 @@ ngramMinHash(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1288,9 +1256,7 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1328,9 +1294,7 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1368,9 +1332,7 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1406,9 +1368,7 @@ ngramMinHashArg(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1444,9 +1404,7 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1482,9 +1440,7 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1520,9 +1476,7 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1560,9 +1514,7 @@ wordShingleMinHash(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1600,9 +1552,7 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1640,9 +1590,7 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1680,9 +1628,7 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two hashes — the minimum and the maximum. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). **Example** @@ -1718,9 +1664,7 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1756,9 +1700,7 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1794,9 +1736,7 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** @@ -1832,9 +1772,7 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. - -Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). **Example** diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 1025b8bdc3d..be8a2956d41 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -40,15 +40,10 @@ addressToLine(address_of_binary_instruction) **Returned value** -- Source code filename and the line number in this file delimited by colon. - - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. - -- Name of a binary, if the function couldn’t find the debug information. - -- Empty string, if the address is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Source code filename and the line number in this file delimited by colon. [String](../../sql-reference/data-types/string.md). + - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. +- Name of a binary, if the function couldn’t find the debug information. [String](../../sql-reference/data-types/string.md). +- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md). **Example** @@ -137,9 +132,7 @@ addressToLineWithInlines(address_of_binary_instruction) - Array with single element which is name of a binary, if the function couldn’t find the debug information. -- Empty array, if the address is not valid. - -Type: [Array(String)](../../sql-reference/data-types/array.md). +- Empty array, if the address is not valid. [Array(String)](../../sql-reference/data-types/array.md). **Example** @@ -236,10 +229,8 @@ addressToSymbol(address_of_binary_instruction) **Returned value** -- Symbol from ClickHouse object files. -- Empty string, if the address is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Symbol from ClickHouse object files. [String](../../sql-reference/data-types/string.md). +- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md). **Example** @@ -333,10 +324,8 @@ demangle(symbol) **Returned value** -- Name of the C++ function. -- Empty string if a symbol is not valid. - -Type: [String](../../sql-reference/data-types/string.md). +- Name of the C++ function. [String](../../sql-reference/data-types/string.md). +- Empty string if a symbol is not valid. [String](../../sql-reference/data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index be20e02d77e..21beffbd0a8 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -151,9 +151,7 @@ IPv6StringToNum(string) **Returned value** -- IPv6 address in binary format. - -Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). +- IPv6 address in binary format. [FixedString(16)](../../sql-reference/data-types/fixedstring.md). **Example** @@ -313,9 +311,7 @@ toIPv6(string) **Returned value** -- IP address. - -Type: [IPv6](../../sql-reference/data-types/ipv6.md). +- IP address. [IPv6](../../sql-reference/data-types/ipv6.md). **Examples** @@ -374,9 +370,7 @@ isIPv4String(string) **Returned value** -- `1` if `string` is IPv4 address, `0` otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md). **Examples** @@ -412,9 +406,7 @@ isIPv6String(string) **Returned value** -- `1` if `string` is IPv6 address, `0` otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md). **Examples** @@ -454,9 +446,7 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as **Returned value** -- `1` or `0`. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` or `0`. [UInt8](../../sql-reference/data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index e920ab82988..fa02dca07db 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -533,9 +533,7 @@ JSONExtractKeys(json[, a, b, c...]) **Returned value** -Array with the keys of the JSON. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Array with the keys of the JSON. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). **Example** @@ -595,10 +593,8 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. -- Empty array if the requested object does not exist, or input JSON is invalid. - -Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). **Examples** @@ -739,9 +735,7 @@ toJSONString(value) **Returned value** -- JSON representation of the value. - -Type: [String](../../sql-reference/data-types/string.md). +- JSON representation of the value. [String](../../sql-reference/data-types/string.md). **Example** @@ -786,9 +780,7 @@ Alias: `JSON_ARRAY_LENGTH(json)`. **Returned value** -- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. - -Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). +- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). **Example** @@ -819,9 +811,7 @@ jsonMergePatch(json1, json2, ...) **Returned value** -- If JSON object strings are valid, return the merged JSON object string. - -Type: [String](../../sql-reference/data-types/string.md). +- If JSON object strings are valid, return the merged JSON object string. [String](../../sql-reference/data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 945166056af..eb0de410f28 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -842,9 +842,7 @@ degrees(x) **Returned value** -- Value in degrees. - -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +- Value in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 11ee471d709..2b4f888d06f 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -27,9 +27,7 @@ getMacro(name); **Returned value** -- Value of the specified macro. - -Type: [String](../../sql-reference/data-types/string.md). +- Value of the specified macro. [String](../../sql-reference/data-types/string.md). **Example** @@ -82,9 +80,7 @@ This function is case-insensitive. **Returned value** -- String with the fully qualified domain name. - -Type: `String`. +- String with the fully qualified domain name. [String](../data-types/string.md). **Example** @@ -207,9 +203,7 @@ byteSize(argument [, ...]) **Returned value** -- Estimation of byte size of the arguments in memory. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Estimation of byte size of the arguments in memory. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -409,10 +403,8 @@ Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive. **Returned values** -- The name of the current user. -- In distributed queries, the login of the user who initiated the query. - -Type: `String`. +- The name of the current user. [String](../data-types/string.md). +- In distributed queries, the login of the user who initiated the query. [String](../data-types/string.md). **Example** @@ -448,10 +440,8 @@ isConstant(x) **Returned values** -- `1` if `x` is constant. -- `0` if `x` is non-constant. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `x` is constant. [UInt8](../../sql-reference/data-types/int-uint.md). +- `0` if `x` is non-constant. [UInt8](../../sql-reference/data-types/int-uint.md). **Examples** @@ -517,8 +507,8 @@ ifNotFinite(x,y) **Arguments** -- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). +- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md). +- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md). **Returned value** @@ -924,9 +914,7 @@ uptime() **Returned value** -- Time value of seconds. - -Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Time value of seconds. [UInt32](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -971,7 +959,7 @@ None. **Returned value** -Type: [String](../data-types/string) +- Current version of ClickHouse. [String](../data-types/string). **Implementation details** @@ -1041,7 +1029,9 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st - Value of `column` with `offset` distance from current row, if `offset` is not outside the block boundaries. - The default value of `column` or `default_value` (if given), if `offset` is outside the block boundaries. -Type: type of data blocks affected or default value type. +:::note +The return type will be that of the data blocks affected or the default value type. +::: **Example** @@ -1238,9 +1228,7 @@ runningConcurrency(start, end) **Returned values** -- The number of concurrent events at each event start time. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md) +- The number of concurrent events at each event start time. [UInt32](../../sql-reference/data-types/int-uint.md) **Example** @@ -1535,7 +1523,7 @@ SELECT * FROM table WHERE indexHint() **Returned value** -Type: [Uint8](https://clickhouse.com/docs/en/data_types/int_uint/#diapazony-uint). +- `1`. [Uint8](../data-types/int-uint.md). **Example** @@ -1638,9 +1626,7 @@ SELECT replicate(x, arr); **Returned value** -An array of the lame length as `arr` filled with value `x`. - -Type: `Array`. +An array of the lame length as `arr` filled with value `x`. [Array](../data-types/array.md). **Example** @@ -1670,9 +1656,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of remaining space available in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Example** @@ -1702,9 +1686,7 @@ filesystemFree() **Returned value** -- The amount of free space in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of free space in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Example** @@ -1734,9 +1716,7 @@ filesystemCapacity() **Returned value** -- Capacity of the filesystem in bytes. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- Capacity of the filesystem in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Example** @@ -1847,7 +1827,9 @@ finalizeAggregation(state) - Value/values that was aggregated. -Type: Value of any types that was aggregated. +:::note +The return type is equal to that of any types which were aggregated. +::: **Examples** @@ -2284,9 +2266,7 @@ countDigits(x) **Returned value** -Number of digits. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +Number of digits. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). :::note For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). @@ -2310,9 +2290,7 @@ Result: ## errorCodeToName -Returns the textual name of an error code. - -Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). +Returns the textual name of an error code. [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). **Syntax** @@ -2343,9 +2321,7 @@ tcpPort() **Returned value** -- The TCP port number. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The TCP port number. [UInt16](../../sql-reference/data-types/int-uint.md). **Example** @@ -2381,9 +2357,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the current user settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## enabledProfiles @@ -2397,9 +2371,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## defaultProfiles @@ -2413,9 +2385,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## currentRoles @@ -2429,9 +2399,7 @@ currentRoles() **Returned value** -- A list of the current roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- A list of the current roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## enabledRoles @@ -2445,9 +2413,7 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## defaultRoles @@ -2461,9 +2427,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## getServerPort @@ -2492,9 +2456,7 @@ getServerPort(port_name) **Returned value** -- The number of the server port. - -Type: [UInt16](../../sql-reference/data-types/int-uint.md). +- The number of the server port. [UInt16](../../sql-reference/data-types/int-uint.md). **Example** @@ -2526,9 +2488,7 @@ queryID() **Returned value** -- The ID of the current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the current query. [String](../../sql-reference/data-types/string.md) **Example** @@ -2562,9 +2522,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. - -Type: [String](../../sql-reference/data-types/string.md) +- The ID of the initial current query. [String](../../sql-reference/data-types/string.md) **Example** @@ -2597,9 +2555,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Shard index or constant `0`. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -2639,9 +2595,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Total number of shards or `0`. [UInt32](../../sql-reference/data-types/int-uint.md). **See Also** @@ -2663,9 +2617,7 @@ getOSKernelVersion() **Returned value** -- The current OS kernel version. - -Type: [String](../../sql-reference/data-types/string.md). +- The current OS kernel version. [String](../../sql-reference/data-types/string.md). **Example** @@ -2699,9 +2651,7 @@ zookeeperSessionUptime() **Returned value** -- Uptime of the current ZooKeeper session in seconds. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- Uptime of the current ZooKeeper session in seconds. [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -2738,9 +2688,7 @@ All arguments must be constant. **Returned value** -- Randomly generated table structure. - -Type: [String](../../sql-reference/data-types/string.md). +- Randomly generated table structure. [String](../../sql-reference/data-types/string.md). **Examples** @@ -2807,9 +2755,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema - -Type: [String](../../sql-reference/data-types/string.md). +- CapnProto schema. [String](../../sql-reference/data-types/string.md). **Examples** @@ -2908,9 +2854,7 @@ structureToProtobufSchema(structure) **Returned value** -- Protobuf schema - -Type: [String](../../sql-reference/data-types/string.md). +- Protobuf schema. [String](../../sql-reference/data-types/string.md). **Examples** diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 2d7752ed022..a7866c6d12e 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -204,9 +204,7 @@ randNormal(mean, variance) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). **Example** @@ -243,9 +241,7 @@ randLogNormal(mean, variance) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). **Example** @@ -282,9 +278,7 @@ randBinomial(experiments, probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -321,9 +315,7 @@ randNegativeBinomial(experiments, probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -359,9 +351,7 @@ randPoisson(n) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -397,9 +387,7 @@ randBernoulli(probability) **Returned value** -- Random number. - -Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** @@ -435,9 +423,7 @@ randExponential(lambda) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). **Example** @@ -473,9 +459,7 @@ randChiSquared(degree_of_freedom) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). **Example** @@ -511,9 +495,7 @@ randStudentT(degree_of_freedom) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). **Example** @@ -550,9 +532,7 @@ randFisherF(d1, d2) **Returned value** -- Random number. - -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). **Example** @@ -588,9 +568,7 @@ randomString(length) **Returned value** -- String filled with random bytes. - -Type: [String](../../sql-reference/data-types/string.md). +- String filled with random bytes. [String](../../sql-reference/data-types/string.md). **Example** @@ -630,9 +608,7 @@ randomFixedString(length); **Returned value(s)** -- String filled with random bytes. - -Type: [FixedString](../../sql-reference/data-types/fixedstring.md). +- String filled with random bytes. [FixedString](../../sql-reference/data-types/fixedstring.md). **Example** @@ -667,9 +643,7 @@ randomPrintableASCII(length) **Returned value** -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. - -Type: [String](../../sql-reference/data-types/string.md) +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../../sql-reference/data-types/string.md) **Example** @@ -701,9 +675,7 @@ randomStringUTF8(length); **Returned value(s)** -- UTF-8 random string. - -Type: [String](../../sql-reference/data-types/string.md). +- UTF-8 random string. [String](../../sql-reference/data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index afec43cd6f4..6cbcc4e4ef3 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -336,7 +336,7 @@ roundAge(num) - Returns `45`, for $45 \leq age \leq 54$. - Returns `55`, for $age \geq 55$. -Type: [UInt8](../data-types/int-uint.md). +Type: [UInt8](../data-types/int-uint.md) in all cases. **Example** diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 8e50637cf30..77563713605 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -25,13 +25,15 @@ splitByChar(separator, s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + +:::note + Empty substrings may be selected when: - A separator occurs at the beginning or end of the string; - There are multiple consecutive separators; - The original string `s` is empty. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +::: :::note The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list. @@ -76,15 +78,17 @@ splitByString(separator, s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +:::note +Empty substrings may be selected when: - A non-empty separator occurs at the beginning or end of the string; - There are multiple consecutive non-empty separators; - The original string `s` is empty while the separator is not empty. Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -131,15 +135,17 @@ splitByRegexp(regexp, s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. Empty substrings may be selected when: +Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + +:::note +Empty substrings may be selected when: - A non-empty regular expression match occurs at the beginning or end of the string; - There are multiple consecutive non-empty regular expression matches; - The original string `s` is empty while the regular expression is not empty. -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -186,11 +192,11 @@ splitByWhitespace(s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). - +Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -225,11 +231,11 @@ splitByNonAlpha(s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -287,11 +293,11 @@ Alias: `splitByAlpha` **Returned value(s)** -Returns an array of selected substrings. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. +::: **Example** @@ -322,11 +328,8 @@ extractAllGroups(text, regexp) **Returned values** -- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). - -- If there is no matching group, returns an empty array. - -Type: [Array](../data-types/array.md). +- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). [Array](../data-types/array.md). +- If there is no matching group, returns an empty array. [Array](../data-types/array.md). **Example** @@ -359,9 +362,7 @@ ngrams(string, ngramsize) **Returned values** -- Array with n-grams. - -Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- Array with n-grams. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). **Example** @@ -387,9 +388,7 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator **Returned value** -- The resulting array of tokens from input string. - -Type: [Array](../data-types/array.md). +- The resulting array of tokens from input string. [Array](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index ba23870a584..f45ceb99617 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -30,9 +30,7 @@ empty(x) **Returned value** -- Returns `1` for an empty string or `0` for a non-empty string. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty string or `0` for a non-empty string. [UInt8](../data-types/int-uint.md). **Example** @@ -68,9 +66,7 @@ notEmpty(x) **Returned value** -- Returns `1` for a non-empty string or `0` for an empty string string. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty string or `0` for an empty string string. [UInt8](../data-types/int-uint.md). **Example** @@ -289,9 +285,7 @@ Alias: `LPAD` **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -325,9 +319,7 @@ leftPadUTF8(string, length[, pad_string]) **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -457,9 +449,7 @@ Alias: `RPAD` **Returned value** -- A left-padded string of the given length. - -Type: [String](../data-types/string.md). +- A left-padded string of the given length. [String](../data-types/string.md). **Example** @@ -493,9 +483,7 @@ rightPadUTF8(string, length[, pad_string]) **Returned value** -- A right-padded string of the given length. - -Type: [String](../data-types/string.md). +- A right-padded string of the given length. [String](../data-types/string.md). **Example** @@ -676,9 +664,7 @@ Alias: `REPEAT` **Returned value** -A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. - -Type: `String`. +A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md). **Example** @@ -712,9 +698,7 @@ Alias: `SPACE`. **Returned value** -The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. - -Type: `String`. +The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md). **Example** @@ -913,9 +897,7 @@ Alias: **Returned value** -A substring of `s` with `length` many bytes, starting at index `offset`. - -Type: `String`. +A substring of `s` with `length` many bytes, starting at index `offset`. [String](../data-types/string.md). **Example** @@ -1072,9 +1054,7 @@ base58Encode(plaintext) **Returned value** -- A string containing the encoded value of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string containing the encoded value of the argument. [String](../../sql-reference/data-types/string.md). **Example** @@ -1106,9 +1086,7 @@ base58Decode(encoded) **Returned value** -- A string containing the decoded value of the argument. - -Type: [String](../../sql-reference/data-types/string.md). +- A string containing the decoded value of the argument. [String](../data-types/string.md). **Example** @@ -1284,9 +1262,7 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Returned value** -A string without leading and/or trailing specified characters. - -Type: `String`. +A string without leading and/or trailing specified characters. [String](../data-types/string.md). **Example** @@ -1320,9 +1296,7 @@ Alias: `ltrim(input_string)`. **Returned value** -A string without leading common whitespaces. - -Type: `String`. +A string without leading common whitespaces. [String](../data-types/string.md). **Example** @@ -1356,9 +1330,7 @@ Alias: `rtrim(input_string)`. **Returned value** -A string without trailing common whitespaces. - -Type: `String`. +A string without trailing common whitespaces. [String](../data-types/string.md). **Example** @@ -1392,9 +1364,7 @@ Alias: `trim(input_string)`. **Returned value** -A string without leading and trailing common whitespaces. - -Type: `String`. +A string without leading and trailing common whitespaces. [String](../data-types/string.md). **Example** @@ -1444,9 +1414,7 @@ normalizeQuery(x) **Returned value** -- Sequence of characters with placeholders. - -Type: [String](../../sql-reference/data-types/string.md). +- Sequence of characters with placeholders. [String](../../sql-reference/data-types/string.md). **Example** @@ -1478,9 +1446,7 @@ normalizedQueryHash(x) **Returned value** -- Hash value. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Hash value. [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). **Example** @@ -1512,9 +1478,7 @@ normalizeUTF8NFC(words) **Returned value** -- String transformed to NFC normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFC normalization form. [String](../../sql-reference/data-types/string.md). **Example** @@ -1546,9 +1510,7 @@ normalizeUTF8NFD(words) **Returned value** -- String transformed to NFD normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFD normalization form. [String](../../sql-reference/data-types/string.md). **Example** @@ -1580,9 +1542,7 @@ normalizeUTF8NFKC(words) **Returned value** -- String transformed to NFKC normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFKC normalization form. [String](../../sql-reference/data-types/string.md). **Example** @@ -1614,9 +1574,7 @@ normalizeUTF8NFKD(words) **Returned value** -- String transformed to NFKD normalization form. - -Type: [String](../../sql-reference/data-types/string.md). +- String transformed to NFKD normalization form. [String](../../sql-reference/data-types/string.md). **Example** @@ -1651,9 +1609,7 @@ encodeXMLComponent(x) **Returned value** -- The escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The escaped string. [String](../../sql-reference/data-types/string.md). **Example** @@ -1691,9 +1647,7 @@ decodeXMLComponent(x) **Returned value** -- The un-escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../../sql-reference/data-types/string.md). **Example** @@ -1727,9 +1681,7 @@ decodeHTMLComponent(x) **Returned value** -- The un-escaped string. - -Type: [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../../sql-reference/data-types/string.md). **Example** @@ -1782,9 +1734,7 @@ extractTextFromHTML(x) **Returned value** -- Extracted text. - -Type: [String](../../sql-reference/data-types/string.md). +- Extracted text. [String](../../sql-reference/data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 9738c19bf3c..327eb8994db 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -896,14 +896,16 @@ extractAllGroupsHorizontal(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../../sql-reference/data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md). +:::note If `haystack` does not match the `pattern` regex, an array of empty arrays is returned. +::: **Example** @@ -931,14 +933,16 @@ extractAllGroupsVertical(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../../sql-reference/data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md). +:::note If `haystack` does not match the `pattern` regex, an empty array is returned. +::: **Example** @@ -1340,9 +1344,7 @@ countSubstrings(haystack, needle[, start_pos]) **Returned values** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -1389,9 +1391,7 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos]) **Returned values** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -1443,9 +1443,7 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) **Returned values** -- The number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -1496,9 +1494,7 @@ countMatches(haystack, pattern) **Returned value** -- The number of matches. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -1543,9 +1539,7 @@ countMatchesCaseInsensitive(haystack, pattern) **Returned value** -- The number of matches. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** @@ -1583,9 +1577,7 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. **Returned values** -`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. - -Type: `String`. +`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. [String](../data-types/string.md). **Examples** @@ -1624,10 +1616,8 @@ hasSubsequence(haystack, needle) **Returned values** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). +- 0, otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1662,10 +1652,8 @@ hasSubsequenceCaseInsensitive(haystack, needle) **Returned values** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). +- 0, otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1700,10 +1688,8 @@ hasSubsequenceUTF8(haystack, needle) **Returned values** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). +- 0, otherwise. [UInt8](../data-types/int-uint.md). Query: @@ -1738,10 +1724,8 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle) **Returned values** -- 1, if needle is a subsequence of haystack. -- 0, otherwise. - -Type: `UInt8`. +- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). +- 0, otherwise. [UInt8](../data-types/int-uint.md). **Examples** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index e80a3fa9860..beb7a0503b9 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -30,9 +30,7 @@ At least four data points are required in `series` to detect outliers. **Returned value** -- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. - -Type: [Array](../../sql-reference/data-types/array.md). +- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../../sql-reference/data-types/array.md). **Examples** @@ -81,10 +79,8 @@ seriesPeriodDetectFFT(series); **Returned value** -- A real value equal to the period of series data -- Returns NAN when number of data points are less than four. - -Type: [Float64](../../sql-reference/data-types/float.md). +- A real value equal to the period of series data. [Float64](../../sql-reference/data-types/float.md). +- Returns NAN when number of data points are less than four. [nan](../../sql-reference/data-types/float.md/#nan-and-inf). **Examples** @@ -134,9 +130,7 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** - An array of four arrays where the first array include seasonal components, the second array - trend, -the third array - residue component, and the fourth array - baseline(seasonal + trend) component. - -Type: [Array](../../sql-reference/data-types/array.md). +the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../../sql-reference/data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index d8f23c92e61..2b5f093c149 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -23,9 +23,7 @@ tumble(time_attr, interval [, timezone]) **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. - -Type: `Tuple(DateTime, DateTime)` +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. **Example** @@ -60,9 +58,7 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. - -Type: `Tuple(DateTime, DateTime)` +- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. **Example** diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 64b1732597f..cfedc01ce8f 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -134,7 +134,9 @@ Tuples should have the same type of the elements. - The Hamming distance. -Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +:::note +The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +::: ``` sql SELECT @@ -200,9 +202,7 @@ tupleToNameValuePairs(tuple) **Returned value** -- An array with (name, value) pairs. - -Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). +- An array with (name, value) pairs. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). **Example** @@ -278,9 +278,7 @@ Alias: `vectorSum`. **Returned value** -- Tuple with the sum. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the sum. [Tuple](../../sql-reference/data-types/tuple.md). **Example** @@ -317,9 +315,7 @@ Alias: `vectorDifference`. **Returned value** -- Tuple with the result of subtraction. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of subtraction. [Tuple](../../sql-reference/data-types/tuple.md). **Example** @@ -354,9 +350,7 @@ tupleMultiply(tuple1, tuple2) **Returned value** -- Tuple with the multiplication. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the multiplication. [Tuple](../../sql-reference/data-types/tuple.md). **Example** @@ -391,9 +385,7 @@ tupleDivide(tuple1, tuple2) **Returned value** -- Tuple with the result of division. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of division. [Tuple](../../sql-reference/data-types/tuple.md). **Example** @@ -427,9 +419,7 @@ tupleNegate(tuple) **Returned value** -- Tuple with the result of negation. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of negation. [Tuple](../../sql-reference/data-types/tuple.md). **Example** @@ -464,9 +454,7 @@ tupleMultiplyByNumber(tuple, number) **Returned value** -- Tuple with multiplied values. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with multiplied values. [Tuple](../../sql-reference/data-types/tuple.md). **Example** @@ -501,9 +489,7 @@ tupleDivideByNumber(tuple, number) **Returned value** -- Tuple with divided values. - -Type: [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with divided values. [Tuple](../../sql-reference/data-types/tuple.md). **Example** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 377283bc006..9468228c737 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -21,9 +21,7 @@ map(key1, value1[, key2, value2, ...]) **Returned value** -- Data structure as `key:value` pairs. - -Type: [Map(key, value)](../../sql-reference/data-types/map.md). +- Data structure as `key:value` pairs. [Map(key, value)](../../sql-reference/data-types/map.md). **Examples** @@ -387,9 +385,7 @@ mapContains(map, key) **Returned value** -- `1` if `map` contains `key`, `0` if not. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `map` contains `key`, `0` if not. [UInt8](../../sql-reference/data-types/int-uint.md). **Example** @@ -431,9 +427,7 @@ mapKeys(map) **Returned value** -- Array containing all keys from the `map`. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing all keys from the `map`. [Array](../../sql-reference/data-types/array.md). **Example** @@ -474,9 +468,7 @@ mapValues(map) **Returned value** -- Array containing all the values from `map`. - -Type: [Array](../../sql-reference/data-types/array.md). +- Array containing all the values from `map`. [Array](../../sql-reference/data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ea08ffa50e7..f1c2e92f201 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -631,9 +631,7 @@ toDateTime64(expr, scale, [timezone]) **Returned value** -- A calendar date and time of day, with sub-second precision. - -Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- A calendar date and time of day, with sub-second precision. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Example** @@ -1749,9 +1747,7 @@ toLowCardinality(expr) **Returned values** -- Result of `expr`. - -Type: `LowCardinality(expr_result_type)` +- Result of `expr`. [LowCardinality](../data-types/lowcardinality.md) of the type of `expr`. **Example** diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md index eb69b1779ae..b4e3fc2d164 100644 --- a/docs/en/sql-reference/functions/ulid-functions.md +++ b/docs/en/sql-reference/functions/ulid-functions.md @@ -65,9 +65,7 @@ ULIDStringToDateTime(ulid[, timezone]) **Returned value** -- Timestamp with milliseconds precision. - -Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). **Usage example** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index a0b0170721c..52eeb539ef4 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -28,7 +28,7 @@ domain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -48,10 +48,8 @@ clickhouse.com **Returned values** -- Host name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse can’t parse the input string as a URL. - -Type: `String`. +- Host name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md). +- Empty string. If ClickHouse can’t parse the input string as a URL. [String](../data-types/string.md). **Example** @@ -79,7 +77,7 @@ topLevelDomain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -91,10 +89,8 @@ https://clickhouse.com/time/ **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse cannot parse the input string as a URL. - -Type: `String`. +- Domain name. If ClickHouse can parse the input string as a URL. [String](../../sql-reference/data-types/string.md). +- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../../sql-reference/data-types/string.md). **Example** @@ -162,9 +158,7 @@ cutToFirstSignificantSubdomain(URL, TLD) **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain. - -Type: [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../../sql-reference/data-types/string.md). **Example** @@ -216,9 +210,7 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. - -Type: [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../../sql-reference/data-types/string.md). **Example** @@ -270,9 +262,7 @@ firstSignificantSubdomainCustom(URL, TLD) **Returned value** -- First significant subdomain. - -Type: [String](../../sql-reference/data-types/string.md). +- First significant subdomain. [String](../../sql-reference/data-types/string.md). **Example** @@ -422,9 +412,7 @@ netloc(URL) **Returned value** -- `username:password@host:port`. - -Type: `String`. +- `username:password@host:port`. [String](../data-types/string.md). **Example** @@ -479,9 +467,7 @@ cutURLParameter(URL, name) **Returned value** -- URL with `name` URL parameter removed. - -Type: `String`. +- URL with `name` URL parameter removed. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index d1b833c2439..0c1da88913d 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -289,9 +289,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str **Returned value** -- Returns `1` for an empty UUID or `0` for a non-empty UUID. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for an empty UUID or `0` for a non-empty UUID. [UInt8](../data-types/int-uint.md). **Example** @@ -331,9 +329,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S **Returned value** -- Returns `1` for a non-empty UUID or `0` for an empty UUID. - -Type: [UInt8](../data-types/int-uint.md). +- Returns `1` for a non-empty UUID or `0` for an empty UUID. [UInt8](../data-types/int-uint.md). **Example** From 508b0356543fc3a49e069166093147b3089ed29a Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 14:08:48 +0000 Subject: [PATCH 554/651] Move is NaN from other-functions to arithmetic functions --- .../en/sql-reference/functions/arithmetic-functions.md | 10 ++++++++++ docs/en/sql-reference/functions/other-functions.md | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 8b8527acfdf..7b079152907 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -194,6 +194,16 @@ Result: You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. +## isNaN + +Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. + +**Syntax** + +```sql +isNaN(x) +``` + ## modulo Calculates the remainder of the division of two values `a` by `b`. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 79c0148d704..c16e8af1ef0 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -541,16 +541,6 @@ Result: └────────────────────┘ ``` -## isNaN - -Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. - -**Syntax** - -```sql -isNaN(x) -``` - ## hasColumnInTable Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. From 8df4da5efaa014f7866288e1aac799f40f52a8c2 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 23 May 2024 14:21:38 +0000 Subject: [PATCH 555/651] Print query in explain plan with parallel replicas --- src/Interpreters/ClusterProxy/executeQuery.cpp | 4 ++++ src/Processors/QueryPlan/ReadFromRemote.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 4bbda982f5b..13e6fa87051 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -403,6 +403,10 @@ void executeQueryWithParallelReplicas( ContextPtr context, std::shared_ptr storage_limits) { + auto logger = getLogger("executeQueryWithParallelReplicas"); + LOG_DEBUG(logger, "Executing read from {}, header {}, query ({}), stage {} with parallel replicas", + storage_id.getNameForLogs(), header.dumpStructure(), query_ast->formatForLogging(), processed_stage); + const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index b4e35af85d6..84c2515e8ca 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -386,6 +386,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( chassert(cluster->getShardCount() == 1); std::vector description; + description.push_back(fmt::format("query: {}", formattedAST(query_ast))); + for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools) description.push_back(fmt::format("Replica: {}", pool->getHost())); From 71ce01404ddb4bf26f88d910452e70bb4a27a842 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 23 May 2024 16:34:52 +0200 Subject: [PATCH 556/651] Fix validation --- src/Analyzer/ValidationUtils.cpp | 3 +++ src/Planner/PlannerExpressionAnalysis.cpp | 24 ++++------------------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp index 9e977964755..59157838edf 100644 --- a/src/Analyzer/ValidationUtils.cpp +++ b/src/Analyzer/ValidationUtils.cpp @@ -276,6 +276,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidatio if (query_node_typed.hasOrderBy()) validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode()); + if (query_node_typed.hasInterpolate()) + validate_group_by_columns_visitor.visit(query_node_typed.getInterpolate()); + validate_group_by_columns_visitor.visit(query_node_typed.getProjectionNode()); } diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 399bbfc67cf..1cdff0a26aa 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -441,30 +441,20 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, auto & interpolate_list_node = query_node.getInterpolate()->as(); PlannerActionsVisitor interpolate_actions_visitor(planner_context); - auto interpolate_expression_dag = std::make_shared(); + auto interpolate_actions_dag = std::make_shared(); for (auto & interpolate_node : interpolate_list_node.getNodes()) { auto & interpolate_node_typed = interpolate_node->as(); - interpolate_actions_visitor.visit(interpolate_expression_dag, interpolate_node_typed.getInterpolateExpression()); + interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); + interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); } std::unordered_map before_sort_actions_inputs_name_to_node; for (const auto & node : before_sort_actions->getInputs()) before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); - std::unordered_set aggregation_keys; - - auto projection_expression_dag = std::make_shared(); - for (const auto & node : query_node.getProjection()) - actions_visitor.visit(projection_expression_dag, node); - for (const auto & node : projection_expression_dag->getNodes()) - aggregation_keys.insert(node.result_name); - - if (aggregation_analysis_result_optional) - aggregation_keys.insert(aggregation_analysis_result_optional->aggregation_keys.begin(), aggregation_analysis_result_optional->aggregation_keys.end()); - - for (const auto & node : interpolate_expression_dag->getNodes()) + for (const auto & node : interpolate_actions_dag->getNodes()) { if (before_sort_actions_dag_output_node_names.contains(node.result_name) || node.type != ActionsDAG::ActionType::INPUT) @@ -479,12 +469,6 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, input_node_it = it; } - if (aggregation_analysis_result_optional) - if (!aggregation_keys.contains(node.result_name)) - throw Exception(ErrorCodes::NOT_AN_AGGREGATE, - "Column {} is not under aggregate function and not in GROUP BY keys. In query {}", - node.result_name, query_node.formatASTForErrorMessage()); - before_sort_actions_outputs.push_back(input_node_it->second); before_sort_actions_dag_output_node_names.insert(node.result_name); } From 21f831da0d823b9f00b02100bedb847d7af6720e Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 23 May 2024 16:36:11 +0200 Subject: [PATCH 557/651] Remove unneeded changes --- src/Planner/PlannerExpressionAnalysis.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 1cdff0a26aa..6e194b2c03e 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -28,7 +28,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int NOT_AN_AGGREGATE; } namespace @@ -398,8 +397,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, SortAnalysisResult analyzeSort(const QueryNode & query_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, - ActionsChain & actions_chain, - std::optional aggregation_analysis_result_optional) + ActionsChain & actions_chain) { ActionsDAGPtr before_sort_actions = std::make_shared(input_columns); auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); @@ -570,7 +568,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo std::optional sort_analysis_result_optional; if (query_node.hasOrderBy()) { - sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain, aggregation_analysis_result_optional); + sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain); current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); } From 47578772e4558ec044b676e13f5be6ae89d6c49f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 May 2024 16:39:16 +0200 Subject: [PATCH 558/651] Fix hdfs assertion --- .../ObjectStorage/Azure/Configuration.h | 2 +- .../ObjectStorage/HDFS/Configuration.h | 2 +- .../ObjectStorage/ReadBufferIterator.cpp | 6 ++--- .../ObjectStorage/S3/Configuration.cpp | 2 +- src/Storages/ObjectStorage/S3/Configuration.h | 2 +- .../ObjectStorage/StorageObjectStorage.h | 2 +- .../StorageObjectStorageSource.cpp | 23 +++++++++++++++---- .../StorageObjectStorageSource.h | 6 +++++ 8 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 19b9cf56f93..35b19079ca9 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -36,7 +36,7 @@ public: void setPaths(const Paths & paths) override { blobs_paths = paths; } String getNamespace() const override { return container; } - String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; } + String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index dc06e754c44..01a8b9c5e3b 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -31,7 +31,7 @@ public: std::string getPathWithoutGlobs() const override; String getNamespace() const override { return ""; } - String getDataSourceDescription() override { return url; } + String getDataSourceDescription() const override { return url; } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 50d69129883..5e89a0a1b9d 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -37,8 +37,7 @@ ReadBufferIterator::ReadBufferIterator( SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const { - chassert(!object_info.getPath().starts_with("/")); - auto source = std::filesystem::path(configuration->getDataSourceDescription()) / object_info.getPath(); + auto source = StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, object_info); return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext()); } @@ -51,8 +50,7 @@ SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const std::back_inserter(sources), [&](const auto & elem) { - chassert(!elem->getPath().starts_with("/")); - return std::filesystem::path(configuration->getDataSourceDescription()) / elem->getPath(); + return StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, *elem); }); return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext()); } diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 00d569fea9f..6b6cde0c431 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -50,7 +50,7 @@ static const std::unordered_set optional_configuration_keys = "no_sign_request" }; -String StorageS3Configuration::getDataSourceDescription() +String StorageS3Configuration::getDataSourceDescription() const { return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket; } diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index de6c02d5020..906d10a1a9a 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -31,7 +31,7 @@ public: void setPaths(const Paths & paths) override { keys = paths; } String getNamespace() const override { return url.bucket; } - String getDataSourceDescription() override; + String getDataSourceDescription() const override; StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; bool isArchive() const override { return url.archive_pattern.has_value(); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 7b118cb7e6b..de75af5035b 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -161,7 +161,7 @@ public: virtual const Paths & getPaths() const = 0; virtual void setPaths(const Paths & paths) = 0; - virtual String getDataSourceDescription() = 0; + virtual String getDataSourceDescription() const = 0; virtual String getNamespace() const = 0; virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 7332574b246..b31d0f8a92e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -82,6 +82,21 @@ void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_ac setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); } +std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( + const Configuration & configuration, + const ObjectInfo & object_info, + bool include_connection_info) +{ + auto path = object_info.getPath(); + if (path.starts_with("/")) + path = path.substr(1); + + if (include_connection_info) + return fs::path(configuration.getDataSourceDescription()) / path; + else + return fs::path(configuration.getNamespace()) / path; +} + std::shared_ptr StorageObjectStorageSource::createFileIterator( ConfigurationPtr configuration, ObjectStoragePtr object_storage, @@ -183,7 +198,7 @@ Chunk StorageObjectStorageSource::generate() VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, - fs::path(configuration->getNamespace()) / reader.getObjectInfo().getPath(), + getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), object_info.metadata->size_bytes, &filename); return chunk; @@ -212,7 +227,7 @@ Chunk StorageObjectStorageSource::generate() void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows) { const auto cache_key = getKeyForSchemaCache( - fs::path(configuration->getDataSourceDescription()) / object_info.getPath(), + getUniqueStoragePathIdentifier(*configuration, object_info), configuration->format, format_settings, getContext()); @@ -222,7 +237,7 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) { const auto cache_key = getKeyForSchemaCache( - fs::path(configuration->getDataSourceDescription()) / object_info.getPath(), + getUniqueStoragePathIdentifier(*configuration, object_info), configuration->format, format_settings, getContext()); @@ -511,7 +526,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne for (const auto & object_info : new_batch) { chassert(object_info); - paths.push_back(fs::path(configuration->getNamespace()) / object_info->getPath()); + paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); } VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index e9635ff4dce..fd7c7aa7102 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -17,6 +17,7 @@ class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { friend class StorageS3QueueSource; public: + using Configuration = StorageObjectStorage::Configuration; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; using ObjectInfo = StorageObjectStorage::ObjectInfo; using ObjectInfos = StorageObjectStorage::ObjectInfos; @@ -58,6 +59,11 @@ public: ObjectInfos * read_keys, std::function file_progress_callback = {}); + static std::string getUniqueStoragePathIdentifier( + const Configuration & configuration, + const ObjectInfo & object_info, + bool include_connection_info = true); + protected: const String name; ObjectStoragePtr object_storage; From 9911f13c77588e089832c05aebfe0aff5b8241cd Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 16:39:53 +0200 Subject: [PATCH 559/651] Update function return type for consistency --- .../en/sql-reference/functions/geo/geohash.md | 14 +- docs/en/sql-reference/functions/geo/h3.md | 276 +++++++----------- docs/en/sql-reference/functions/geo/s2.md | 42 ++- docs/en/sql-reference/functions/geo/svg.md | 4 +- .../functions/rounding-functions.md | 16 +- .../functions/string-search-functions.md | 6 +- .../sql-reference/functions/uuid-functions.md | 8 +- 7 files changed, 138 insertions(+), 228 deletions(-) diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index ce16af44e90..80c55650b9c 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi **Arguments** -- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md). +- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md). +- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md). +- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md). +- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../../sql-reference/data-types/int-uint.md). :::note All coordinate parameters must be of the same type: either `Float32` or `Float64`. @@ -86,11 +86,9 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64 **Returned values** -- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. +- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). - `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. -Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). - :::note Function throws an exception if resulting array is over 10’000’000 items long. ::: diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 29486c58e6a..7faff8288b3 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -26,14 +26,12 @@ h3IsValid(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — The number is a valid H3 index. -- 0 — The number is not a valid H3 index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The number is a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- 0 — The number is not a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -63,14 +61,12 @@ h3GetResolution(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Index resolution. Range: `[0, 15]`. -- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -100,11 +96,11 @@ h3EdgeAngle(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -134,11 +130,11 @@ h3EdgeLengthM(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -168,11 +164,11 @@ h3EdgeLengthKm(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -202,16 +198,14 @@ geoToH3(lon, lat, resolution) **Arguments** -- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Hexagon index number. -- 0 in case of error. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- 0 in case of error. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -275,12 +269,11 @@ h3ToGeoBoundary(h3Index) **Arguments** -- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of pairs '(lon, lat)'. -Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). **Example** @@ -311,14 +304,12 @@ h3kRing(h3index, k) **Arguments** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) +- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Radius. [integer](../../../sql-reference/data-types/int-uint.md) **Returned values** -- Array of H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -354,13 +345,11 @@ h3GetBaseCell(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Hexagon base cell number. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Hexagon base cell number. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -390,13 +379,11 @@ h3HexAreaM2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Area in square meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Area in square meters. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -426,13 +413,11 @@ h3HexAreaKm2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Area in square kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Area in square kilometers. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -462,15 +447,13 @@ h3IndexesAreNeighbors(index1, index2) **Arguments** -- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index1` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index2` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Indexes are neighbours. -- `0` — Indexes are not neighbours. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Indexes are neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `0` — Indexes are not neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -500,14 +483,12 @@ h3ToChildren(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of the child H3-indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of the child H3-indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -537,14 +518,12 @@ h3ToParent(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Parent H3 index. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Parent H3 index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -572,13 +551,11 @@ h3ToString(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- String representation of the H3 index. - -Type: [String](../../../sql-reference/data-types/string.md). +- String representation of the H3 index. [String](../../../sql-reference/data-types/string.md). **Example** @@ -608,11 +585,11 @@ stringToH3(index_str) **Parameter** -- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). +- `index_str` — String representation of the H3 index. [String](../../../sql-reference/data-types/string.md). **Returned value** -- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. Returns 0 on error. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -642,11 +619,11 @@ h3GetResolution(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -676,14 +653,12 @@ h3IsResClassIII(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Index has a resolution with Class III orientation. -- `0` — Index doesn't have a resolution with Class III orientation. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index has a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -713,14 +688,12 @@ h3IsPentagon(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Index represents a pentagonal cell. -- `0` — Index doesn't represent a pentagonal cell. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index represents a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -750,13 +723,11 @@ h3GetFaces(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array containing icosahedron faces intersected by a given H3 index. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array containing icosahedron faces intersected by a given H3 index. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -786,13 +757,11 @@ h3CellAreaM2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Cell area in square meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square meters. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -822,13 +791,11 @@ h3CellAreaRads2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Cell area in square radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square radians. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -858,14 +825,12 @@ h3ToCenterChild(index, resolution) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -895,13 +860,11 @@ h3ExactEdgeLengthM(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in meters. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -931,13 +894,11 @@ h3ExactEdgeLengthKm(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in kilometers. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -967,13 +928,11 @@ h3ExactEdgeLengthRads(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in radians. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -1003,13 +962,11 @@ h3NumHexagons(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Number of H3 indices. - -Type: [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of H3 indices. [Int64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -1039,14 +996,12 @@ h3PointDistM(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in meters. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in meters.[Float64](../../../sql-reference/data-types/float.md). **Example** @@ -1076,14 +1031,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in kilometers. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in kilometers. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -1113,14 +1066,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in radians. - -Type: [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in radians. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -1150,9 +1101,7 @@ h3GetRes0Indexes() **Returned values** -- Array of all the resolution 0 H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all the resolution 0 H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -1183,13 +1132,11 @@ h3GetPentagonIndexes(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Array of all pentagon H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all pentagon H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -1219,14 +1166,12 @@ h3Line(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -Array of h3 indexes representing the line of indices between the two provided indices: - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -1256,14 +1201,12 @@ h3Distance(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Number of grid cells. - -Type: [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of grid cells. [Int64](../../../sql-reference/data-types/int-uint.md). Returns a negative number if finding the distance fails. @@ -1297,14 +1240,12 @@ h3HexRing(index, k) **Parameter** -- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents the origin. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Distance. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of H3 indexes. - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -1334,14 +1275,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex) **Parameter** -- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `originIndex` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Unidirectional Edge Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Unidirectional Edge Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -1371,14 +1310,12 @@ h3UnidirectionalEdgeisValid(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- 1 — The H3 index is a valid unidirectional edge. -- 0 — The H3 index is not a valid unidirectional edge. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md). +- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -1408,13 +1345,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Origin Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Origin Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -1444,13 +1379,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Destination Hexagon Index number. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Destination Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -1480,7 +1413,7 @@ h3GetIndexesFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1519,13 +1452,11 @@ h3GetUnidirectionalEdgesFromHexagon(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -Array of h3 indexes representing each unidirectional edge: - -Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing each unidirectional edge. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -1555,12 +1486,11 @@ h3GetUnidirectionalEdgeBoundary(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Array of pairs '(lon, lat)'. - Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). **Example** diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index f4702eff44b..424b547753d 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -26,9 +26,7 @@ geoToS2(lon, lat) **Returned values** -- S2 point index. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -62,9 +60,9 @@ s2ToGeo(s2index) **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. - -Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md). +- A [tuple](../../data-types/tuple.md) consisting of two values: + - `lon`. [Float64](../../../sql-reference/data-types/float.md). + - `lat`. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -98,9 +96,7 @@ s2GetNeighbors(s2index) **Returned values** -- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. - -Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). **Example** @@ -134,10 +130,8 @@ s2CellsIntersect(s2index1, s2index2) **Returned values** -- 1 — If the cells intersect. -- 0 — If the cells don't intersect. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). +- 0 — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -173,10 +167,8 @@ s2CapContains(center, degrees, point) **Returned values** -- 1 — If the cap contains the S2 point index. -- 0 — If the cap doesn't contain the S2 point index. - -Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- 0 — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -211,8 +203,8 @@ s2CapUnion(center1, radius1, center2, radius2) **Returned values** -- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). +- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -248,8 +240,8 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Returned values** -- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/float.md). **Example** @@ -321,8 +313,8 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi) **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -357,8 +349,8 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/geo/svg.md b/docs/en/sql-reference/functions/geo/svg.md index c565d1f9de7..320d4542fee 100644 --- a/docs/en/sql-reference/functions/geo/svg.md +++ b/docs/en/sql-reference/functions/geo/svg.md @@ -23,13 +23,11 @@ Aliases: `SVG`, `svg` **Returned value** -- The SVG representation of the geometry: +- The SVG representation of the geometry. [String](../../data-types/string). - SVG circle - SVG polygon - SVG path -Type: [String](../../data-types/string) - **Examples** **Circle** diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 6cbcc4e4ef3..20f73de4410 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -328,15 +328,13 @@ roundAge(num) **Returned value** -- Returns `0`, for $age \lt 1$. -- Returns `17`, for $1 \leq age \leq 17$. -- Returns `18`, for $18 \leq age \leq 24$. -- Returns `25`, for $25 \leq age \leq 34$. -- Returns `35`, for $35 \leq age \leq 44$. -- Returns `45`, for $45 \leq age \leq 54$. -- Returns `55`, for $age \geq 55$. - -Type: [UInt8](../data-types/int-uint.md) in all cases. +- Returns `0`, for $age \lt 1$. [UInt8](../data-types/int-uint.md). +- Returns `17`, for $1 \leq age \leq 17$. [UInt8](../data-types/int-uint.md). +- Returns `18`, for $18 \leq age \leq 24$. [UInt8](../data-types/int-uint.md). +- Returns `25`, for $25 \leq age \leq 34$. [UInt8](../data-types/int-uint.md). +- Returns `35`, for $35 \leq age \leq 44$. [UInt8](../data-types/int-uint.md). +- Returns `45`, for $45 \leq age \leq 54$. [UInt8](../data-types/int-uint.md). +- Returns `55`, for $age \geq 55$. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 327eb8994db..f02c8f15aa9 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -42,8 +42,8 @@ Alias: **Returned values** -- Starting position in bytes and counting from 1, if the substring was found. -- 0, if the substring was not found. +- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../../sql-reference/data-types/int-uint.md). +- 0, if the substring was not found. [UInt64](../../sql-reference/data-types/int-uint.md). If substring `needle` is empty, these rules apply: - if no `start_pos` was specified: return `1` @@ -53,8 +53,6 @@ If substring `needle` is empty, these rules apply: The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`. -Type: `Integer`. - **Examples** Query: diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 0c1da88913d..a16663afc5b 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -640,9 +640,7 @@ UUIDv7ToDateTime(uuid[, timezone]) **Returned value** -- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. - -Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). **Usage examples** @@ -682,9 +680,7 @@ serverUUID() **Returned value** -- The UUID of the server. - -Type: [UUID](../data-types/uuid.md). +- The UUID of the server. [UUID](../data-types/uuid.md). ## See also From 45e4e30cfd13f35bda29629d42f881c69bbf5250 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 23 May 2024 16:51:17 +0200 Subject: [PATCH 560/651] Update retuurn type of logical functions --- .../functions/logical-functions.md | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 138b804a575..1977c5c2a7e 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -30,11 +30,9 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope **Returned value** -- `0`, if at least one argument evaluates to `false`, -- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`, -- `1`, otherwise. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `0`, if at least one argument evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`. [NULL](../../sql-reference/syntax.md/#null). +- `1`, otherwise. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -136,11 +134,9 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne **Returned value** -- `1`, if `val` evaluates to `false`, -- `0`, if `val` evaluates to `true`, -- `NULL`, if `val` is `NULL`. - -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `1`, if `val` evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `0`, if `val` evaluates to `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `NULL`, if `val` is `NULL`. [NULL](../../sql-reference/syntax.md/#null). **Example** @@ -172,11 +168,9 @@ xor(val1, val2...) **Returned value** -- `1`, for two values: if one of the values evaluates to `false` and other does not, -- `0`, for two values: if both values evaluate to `false` or to both `true`, -- `NULL`, if at least one of the inputs is `NULL` - -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `NULL`, if at least one of the inputs is `NULL`. [NULL](../../sql-reference/syntax.md/#null). **Example** From 60e94af1ecd1e2b3e5b3f3194901d001653b7991 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 23 May 2024 16:55:02 +0200 Subject: [PATCH 561/651] Return one line change --- src/Planner/PlannerExpressionAnalysis.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 6e194b2c03e..7984d97a1ea 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -444,7 +444,6 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (auto & interpolate_node : interpolate_list_node.getNodes()) { auto & interpolate_node_typed = interpolate_node->as(); - interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); } From a4903e6b5583b172496be8fa0dbf6cead2b51d86 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 May 2024 16:55:48 +0200 Subject: [PATCH 562/651] Add supportsDynamicSubcolumns() --- src/Storages/ObjectStorage/StorageObjectStorage.h | 2 ++ src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index de75af5035b..f45d8c1f01a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -84,6 +84,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } bool supportsSubsetOfColumns(const ContextPtr & context) const; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 1c244b1ca36..69fec2b3c77 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -26,6 +26,8 @@ public: bool supportsSubcolumns() const override { return true; } + bool supportsDynamicSubcolumns() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } RemoteQueryExecutor::Extension getTaskIteratorExtension( From 9481f2f32535630694b9c328384b69116f3b535b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 17:07:55 +0200 Subject: [PATCH 563/651] Update array-functions.md Add missing ::: for note --- docs/en/sql-reference/functions/array-functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 512874d20b7..458adb276fd 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2373,6 +2373,7 @@ arrayMin([func,] arr) :::note If `func` is specified, then the return type matches the return value type of `func`, otherwise it matches the type of the array elements. +::: **Examples** From 9cfd2322d717fc6d2208683b224ee6969932de79 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 17:14:56 +0200 Subject: [PATCH 564/651] Small edits to bit-functions.md --- docs/en/sql-reference/functions/bit-functions.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 709f438d67f..2538ad32022 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -186,9 +186,9 @@ SELECT bitTest(number, index) - `number` – Integer number. - `index` – Position of bit. -**Returned values** +**Returned value** -Returns a value of bit at specified position. [UInt8](../data-types/int-uint.md). +- Value of the bit at the specified position. [UInt8](../data-types/int-uint.md). **Example** @@ -249,9 +249,9 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...) - `number` – Integer number. - `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). -**Returned values** +**Returned value** -Returns result of logical conjuction. [UInt8](../data-types/int-uint.md). +- Result of the logical conjuction. [UInt8](../data-types/int-uint.md). **Example** @@ -312,9 +312,9 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...) - `number` – Integer number. - `index1`, `index2`, `index3`, `index4` – Positions of bit. -**Returned values** +**Returned value** -Returns result of logical disjunction. [UInt8](../data-types/int-uint.md). +- Result of the logical disjunction. [UInt8](../data-types/int-uint.md). **Example** From a01b6e8e8278b531a72463eb6f1920fe8d682c0e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 17:19:03 +0200 Subject: [PATCH 565/651] Numbers in return type should be in `` --- docs/en/sql-reference/functions/geo/s2.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index 424b547753d..2158ef2d57d 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -94,7 +94,7 @@ s2GetNeighbors(s2index) - `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** - An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -128,10 +128,10 @@ s2CellsIntersect(s2index1, s2index2) - `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). -- 0 — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `0` — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -165,10 +165,10 @@ s2CapContains(center, degrees, point) - `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). - `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). -- 0 — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -275,10 +275,10 @@ s2RectContains(s2PointLow, s2PointHi, s2Point) - `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). - `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). -**Returned values** +**Returned value** -- 1 — If the rectangle contains the given S2 point. -- 0 — If the rectangle doesn't contain the given S2 point. +- `1` — If the rectangle contains the given S2 point. +- `0` — If the rectangle doesn't contain the given S2 point. **Example** From 732b6d1ecc5df7360e0290e950904b7512711777 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 17:22:02 +0200 Subject: [PATCH 566/651] Add hyphens to return values --- .../functions/splitting-merging-functions.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 77563713605..8aa171949a3 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -25,7 +25,7 @@ splitByChar(separator, s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). :::note Empty substrings may be selected when: @@ -78,7 +78,7 @@ splitByString(separator, s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). :::note Empty substrings may be selected when: @@ -135,7 +135,7 @@ splitByRegexp(regexp, s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). :::note Empty substrings may be selected when: @@ -192,7 +192,7 @@ splitByWhitespace(s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). :::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. @@ -231,7 +231,7 @@ splitByNonAlpha(s[, max_substrings])) **Returned value(s)** -Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). :::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. @@ -293,7 +293,7 @@ Alias: `splitByAlpha` **Returned value(s)** -Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). :::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. From bab94ac56aa0ef568d34dd1e230e29190e8eaec9 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 23 May 2024 17:24:07 +0200 Subject: [PATCH 567/651] Correct "note:::" to ":::note" --- docs/en/sql-reference/functions/hash-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 89b95888f85..e3968a691a8 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -844,7 +844,7 @@ SELECT xxHash64('') - Hash value. [UInt32/64](../data-types/int-uint.md). -note::: +:::note The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`. ::: From c1950236ced0b110e679c4042d1fab2c7df26f2f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 15:24:18 +0000 Subject: [PATCH 568/651] Cosmetics, pt. IV --- src/Functions/{serial.cpp => generateSerialID.cpp} | 2 -- 1 file changed, 2 deletions(-) rename src/Functions/{serial.cpp => generateSerialID.cpp} (98%) diff --git a/src/Functions/serial.cpp b/src/Functions/generateSerialID.cpp similarity index 98% rename from src/Functions/serial.cpp rename to src/Functions/generateSerialID.cpp index d65df83c9f9..db26d0d684b 100644 --- a/src/Functions/serial.cpp +++ b/src/Functions/generateSerialID.cpp @@ -12,8 +12,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int KEEPER_EXCEPTION; } From e6f135089f300a6e5cc0d1276e748750f2b59454 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 15:25:38 +0000 Subject: [PATCH 569/651] Cosmetics, pt. V --- src/Functions/generateSnowflakeID.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 1b26bf44adb..bbae41e4f49 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -11,11 +11,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { @@ -81,7 +76,7 @@ SnowflakeComponents toComponents(uint64_t snowflake) { uint64_t toSnowflakeID(SnowflakeComponents components) { return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) | - components.machind_id << (machine_seq_num_bits_count) | + components.machind_id << (machine_seq_num_bits_count) | components.machine_seq_num); } @@ -120,7 +115,7 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1); else end.timestamp = begin.timestamp; - + end.machind_id = begin.machind_id; end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask; From 4611a44c1f76873482fff498f7e7f8414f24e375 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 15:53:14 +0000 Subject: [PATCH 570/651] Cosmetics, pt. VI --- src/Functions/generateSnowflakeID.cpp | 100 +++++++++++++------------- src/Functions/generateUUIDv7.cpp | 25 ++++--- 2 files changed, 60 insertions(+), 65 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index bbae41e4f49..4e61bd9fb1c 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -27,7 +27,7 @@ namespace - The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970) - The middle 10 bits are the machine ID -- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes +- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes */ /// bit counts @@ -36,14 +36,13 @@ constexpr auto machine_id_bits_count = 10; constexpr auto machine_seq_num_bits_count = 12; /// bits masks for Snowflake ID components -// constexpr uint64_t timestamp_mask = ((1ULL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); // unused -constexpr uint64_t machine_id_mask = ((1ULL << machine_id_bits_count) - 1) << machine_seq_num_bits_count; -constexpr uint64_t machine_seq_num_mask = (1ULL << machine_seq_num_bits_count) - 1; +constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count; +constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1; /// max values constexpr uint64_t max_machine_seq_num = machine_seq_num_mask; -uint64_t getMachineID() +uint64_t getMachineId() { UUID server_uuid = ServerUUID::get(); /// hash into 64 bits @@ -57,48 +56,44 @@ uint64_t getTimestamp() { auto now = std::chrono::system_clock::now(); auto ticks_since_epoch = std::chrono::duration_cast(now.time_since_epoch()).count(); - return static_cast(ticks_since_epoch) & ((1ULL << timestamp_bits_count) - 1); + return static_cast(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1); } -struct SnowflakeComponents { +struct SnowflakeId +{ uint64_t timestamp; uint64_t machind_id; uint64_t machine_seq_num; }; -SnowflakeComponents toComponents(uint64_t snowflake) { - return { - .timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)), - .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count), - .machine_seq_num = (snowflake & machine_seq_num_mask) - }; +SnowflakeId toSnowflakeId(uint64_t snowflake) +{ + return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)), + .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count), + .machine_seq_num = (snowflake & machine_seq_num_mask)}; } -uint64_t toSnowflakeID(SnowflakeComponents components) { +uint64_t fromSnowflakeId(SnowflakeId components) +{ return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) | components.machind_id << (machine_seq_num_bits_count) | components.machine_seq_num); } -struct RangeOfSnowflakeIDs { - /// [begin, end) - SnowflakeComponents begin, end; +struct SnowflakeIdRange +{ + SnowflakeId begin; /// inclusive + SnowflakeId end; /// exclusive }; -/* Get range of `input_rows_count` Snowflake IDs from `max(available, now)` - -1. Calculate Snowflake ID by current timestamp (`now`) -2. `begin = max(available, now)` -3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow -*/ -RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, size_t input_rows_count) +/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`: +/// 1. calculate Snowflake ID by current timestamp (`now`) +/// 2. `begin = max(available, now)` +/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow +SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count) { /// 1. `now` - SnowflakeComponents begin = { - .timestamp = getTimestamp(), - .machind_id = getMachineID(), - .machine_seq_num = 0 - }; + SnowflakeId begin = {.timestamp = getTimestamp(), .machind_id = getMachineId(), .machine_seq_num = 0}; /// 2. `begin` if (begin.timestamp <= available.timestamp) @@ -108,7 +103,7 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, } /// 3. `end = begin + input_rows_count` - SnowflakeComponents end; + SnowflakeId end; const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1); if (input_rows_count >= seq_nums_in_current_timestamp_left) /// if sequence numbers in current timestamp is not enough for rows => update timestamp @@ -125,22 +120,22 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, struct GlobalCounterPolicy { static constexpr auto name = "generateSnowflakeID"; - static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. struct Data { static inline std::atomic lowest_available_snowflake_id = 0; - SnowflakeComponents reserveRange(size_t input_rows_count) + SnowflakeId reserveRange(size_t input_rows_count) { uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); - RangeOfSnowflakeIDs range; + SnowflakeIdRange range; do { - range = getRangeOfAvailableIDs(toComponents(available_snowflake_id), input_rows_count); + range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count); } - while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, toSnowflakeID(range.end))); + while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); /// if `compare_exhange` failed => another thread updated `lowest_available_snowflake_id` and we should try again /// completed => range of IDs [begin, end) is reserved, can return the beginning of the range @@ -152,17 +147,17 @@ struct GlobalCounterPolicy struct ThreadLocalCounterPolicy { static constexpr auto name = "generateSnowflakeIDThreadMonotonic"; - static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)"; + static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)"; /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. struct Data { static inline thread_local uint64_t lowest_available_snowflake_id = 0; - SnowflakeComponents reserveRange(size_t input_rows_count) + SnowflakeId reserveRange(size_t input_rows_count) { - RangeOfSnowflakeIDs range = getRangeOfAvailableIDs(toComponents(lowest_available_snowflake_id), input_rows_count); - lowest_available_snowflake_id = toSnowflakeID(range.end); + SnowflakeIdRange range = getRangeOfAvailableIds(toSnowflakeId(lowest_available_snowflake_id), input_rows_count); + lowest_available_snowflake_id = fromSnowflakeId(range.end); return range.begin; } }; @@ -188,7 +183,7 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "Arbitrary expression"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -200,17 +195,18 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - vec_to.resize(input_rows_count); - if (input_rows_count != 0) { + vec_to.resize(input_rows_count); + typename FillPolicy::Data data; + /// get the begin of available snowflake ids range - SnowflakeComponents snowflake_id = data.reserveRange(input_rows_count); + SnowflakeId snowflake_id = data.reserveRange(input_rows_count); for (UInt64 & to_row : vec_to) { - to_row = toSnowflakeID(snowflake_id); + to_row = fromSnowflakeId(snowflake_id); if (snowflake_id.machine_seq_num++ == max_machine_seq_num) { snowflake_id.machine_seq_num = 0; @@ -225,20 +221,20 @@ public: }; template -void registerSnowflakeIDGenerator(auto& factory) +void registerSnowflakeIDGenerator(auto & factory) { static constexpr auto doc_syntax_format = "{}([expression])"; static constexpr auto example_format = "SELECT {}()"; static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; - FunctionDocumentation::Description doc_description = FillPolicy::doc_description; - FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name); - FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; - FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UInt64"; - FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; - FunctionDocumentation::Categories doc_categories = {"Snowflake ID"}; + FunctionDocumentation::Description description = FillPolicy::description; + FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name); + FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; + FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; - factory.template registerFunction>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); + factory.template registerFunction>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive); } REGISTER_FUNCTION(GenerateSnowflakeID) diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp index 411a3a076ac..f2a82431c0a 100644 --- a/src/Functions/generateUUIDv7.cpp +++ b/src/Functions/generateUUIDv7.cpp @@ -76,7 +76,7 @@ void setVariant(UUID & uuid) struct FillAllRandomPolicy { static constexpr auto name = "generateUUIDv7NonMonotonic"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)"; + static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)"; struct Data { void generate(UUID & uuid, uint64_t ts) @@ -136,7 +136,7 @@ struct CounterFields struct GlobalCounterPolicy { static constexpr auto name = "generateUUIDv7"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; /// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously. struct Data @@ -159,7 +159,7 @@ struct GlobalCounterPolicy struct ThreadLocalCounterPolicy { static constexpr auto name = "generateUUIDv7ThreadMonotonic"; - static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)"; + static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)"; /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads. struct Data @@ -186,7 +186,6 @@ class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy { public: String getName() const final { return FillPolicy::name; } - size_t getNumberOfArguments() const final { return 0; } bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const final { return false; } @@ -198,7 +197,7 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary Expression"} + {"expr", nullptr, nullptr, "Arbitrary expression"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); @@ -264,20 +263,20 @@ private: }; template -void registerUUIDv7Generator(auto& factory) +void registerUUIDv7Generator(auto & factory) { static constexpr auto doc_syntax_format = "{}([expression])"; static constexpr auto example_format = "SELECT {}()"; static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)"; - FunctionDocumentation::Description doc_description = FillPolicy::doc_description; - FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name); - FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; - FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7."; - FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; - FunctionDocumentation::Categories doc_categories = {"UUID"}; + FunctionDocumentation::Description description = FillPolicy::description; + FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name); + FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7."; + FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}}; + FunctionDocumentation::Categories categories = {"UUID"}; - factory.template registerFunction>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); + factory.template registerFunction>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive); } REGISTER_FUNCTION(GenerateUUIDv7) From 91c1456141f2783234d1a7fd6a749e9e0493c46e Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Wed, 22 May 2024 22:11:46 +0300 Subject: [PATCH 571/651] CNF with mutually exclusive atoms reduction fix --- src/Analyzer/Passes/ConvertQueryToCNFPass.cpp | 20 +++++- src/Interpreters/TreeCNFConverter.h | 21 +++++- .../WhereConstraintsOptimizer.cpp | 19 ++++- .../0_stateless/03161_cnf_reduction.reference | 23 ++++++ .../0_stateless/03161_cnf_reduction.sql | 72 +++++++++++++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03161_cnf_reduction.reference create mode 100644 tests/queries/0_stateless/03161_cnf_reduction.sql diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 96bc62212fd..5951e8fc5ea 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -99,6 +99,23 @@ bool checkIfGroupAlwaysTrueGraph(const Analyzer::CNF::OrGroup & group, const Com return false; } +bool checkIfGroupAlwaysTrueAtoms(const Analyzer::CNF::OrGroup & group) +{ + /// Filters out groups containing mutually exclusive atoms, + /// since these groups are always True + + for (const auto & atom : group) + { + auto negated(atom); + negated.negative = !atom.negative; + if (group.contains(negated)) + { + return true; + } + } + return false; +} + bool checkIfAtomAlwaysFalseFullMatch(const Analyzer::CNF::AtomicFormula & atom, const ConstraintsDescription::QueryTreeData & query_tree_constraints) { const auto constraint_atom_ids = query_tree_constraints.getAtomIds(atom.node_with_hash); @@ -644,7 +661,8 @@ void optimizeWithConstraints(Analyzer::CNF & cnf, const QueryTreeNodes & table_e cnf.filterAlwaysTrueGroups([&](const auto & group) { /// remove always true groups from CNF - return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints) && !checkIfGroupAlwaysTrueGraph(group, compare_graph); + return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints) + && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group); }) .filterAlwaysFalseAtoms([&](const Analyzer::CNF::AtomicFormula & atom) { diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index 8258412f1a6..ae1551cd9c2 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -164,6 +164,12 @@ public: void pushNotIn(CNFQuery::AtomicFormula & atom); +/// Reduces CNF groups by removing mutually exclusive atoms +/// found across groups, in case other atoms are identical. +/// Might require multiple passes to complete reduction. +/// +/// Example: +/// (x OR y) AND (x OR !y) -> x template TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) { @@ -175,10 +181,19 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) bool inserted = false; for (const auto & atom : group) { - copy.erase(atom); using AtomType = std::decay_t; AtomType negative_atom(atom); negative_atom.negative = !atom.negative; + + // Sikpping erase-insert for mutually exclusive atoms within + // signle group, since it won't insert negative atom, which + // will break the logic of this rule + if (copy.contains(negative_atom)) + { + continue; + } + + copy.erase(atom); copy.insert(negative_atom); if (groups.contains(copy)) @@ -209,6 +224,10 @@ bool isCNFGroupSubset(const TOrGroup & left, const TOrGroup & right) return true; } +/// Removes CNF groups if subset group is found in CNF. +/// +/// Example: +/// (x OR y) AND (x) -> x template TAndGroup filterCNFSubsets(const TAndGroup & groups) { diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp index 979a4f4dbf5..456cf76b987 100644 --- a/src/Interpreters/WhereConstraintsOptimizer.cpp +++ b/src/Interpreters/WhereConstraintsOptimizer.cpp @@ -91,6 +91,22 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis return false; } +bool checkIfGroupAlwaysTrueAtoms(const CNFQuery::OrGroup & group) +{ + /// Filters out groups containing mutually exclusive atoms, + /// since these groups are always True + + for (const auto & atom : group) + { + auto negated(atom); + negated.negative = !atom.negative; + if (group.contains(negated)) + { + return true; + } + } + return false; +} bool checkIfAtomAlwaysFalseFullMatch(const CNFQuery::AtomicFormula & atom, const ConstraintsDescription & constraints_description) { @@ -158,7 +174,8 @@ void WhereConstraintsOptimizer::perform() .filterAlwaysTrueGroups([&compare_graph, this](const auto & group) { /// remove always true groups from CNF - return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) && !checkIfGroupAlwaysTrueGraph(group, compare_graph); + return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) + && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group); }) .filterAlwaysFalseAtoms([&compare_graph, this](const auto & atom) { diff --git a/tests/queries/0_stateless/03161_cnf_reduction.reference b/tests/queries/0_stateless/03161_cnf_reduction.reference new file mode 100644 index 00000000000..5e39c0f3223 --- /dev/null +++ b/tests/queries/0_stateless/03161_cnf_reduction.reference @@ -0,0 +1,23 @@ +-- Expected plan with analyzer: +SELECT id +FROM `03161_table` +WHERE f +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1 + +-- Expected result with analyzer: +1 + +-- Expected plan w/o analyzer: +SELECT id +FROM `03161_table` +WHERE f +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0 + +-- Expected result w/o analyzer: +1 + +-- Reproducer from the issue with analyzer +2 + +-- Reproducer from the issue w/o analyzer +2 diff --git a/tests/queries/0_stateless/03161_cnf_reduction.sql b/tests/queries/0_stateless/03161_cnf_reduction.sql new file mode 100644 index 00000000000..b34e9171d45 --- /dev/null +++ b/tests/queries/0_stateless/03161_cnf_reduction.sql @@ -0,0 +1,72 @@ +DROP TABLE IF EXISTS 03161_table; + +CREATE TABLE 03161_table (id UInt32, f UInt8) ENGINE = Memory; + +INSERT INTO 03161_table VALUES (0, 0), (1, 1), (2, 0); + +SELECT '-- Expected plan with analyzer:'; + +EXPLAIN SYNTAX +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Expected result with analyzer:'; + +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Expected plan w/o analyzer:'; + +EXPLAIN SYNTAX +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +SELECT ''; + +SELECT '-- Expected result w/o analyzer:'; + +SELECT id +FROM 03161_table +WHERE f AND (NOT(f) OR f) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS 03161_table; + +-- Checking reproducer from GitHub issue +-- https://github.com/ClickHouse/ClickHouse/issues/57400 + +DROP TABLE IF EXISTS 03161_reproducer; + +CREATE TABLE 03161_reproducer (c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8) ENGINE = Memory; + +INSERT INTO 03161_reproducer VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (0, 0, 0, 0, 0, 0, 0, 0, 1, 1), (0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (0, 0, 0, 0, 0, 0, 0, 1, 0, 1), (0, 0, 0, 0, 0, 0, 0, 1, 1, 0), (0, 0, 0, 0, 0, 0, 0, 1, 1, 1); + +SELECT ''; + +SELECT '-- Reproducer from the issue with analyzer'; + +SELECT count() +FROM 03161_reproducer +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1; + +SELECT ''; + +SELECT '-- Reproducer from the issue w/o analyzer'; + +SELECT count() +FROM 03161_reproducer +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0; + +DROP TABLE IF EXISTS 03161_reproducer; From c7aa283b7a418f6372e67b386342815629e26f39 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov <13005055+korowa@users.noreply.github.com> Date: Thu, 23 May 2024 14:20:15 +0300 Subject: [PATCH 572/651] Update src/Interpreters/TreeCNFConverter.h Co-authored-by: Antonio Andelic --- src/Interpreters/TreeCNFConverter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index ae1551cd9c2..ec4b029eee9 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -186,7 +186,7 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups) negative_atom.negative = !atom.negative; // Sikpping erase-insert for mutually exclusive atoms within - // signle group, since it won't insert negative atom, which + // single group, since it won't insert negative atom, which // will break the logic of this rule if (copy.contains(negative_atom)) { From 2315991504b1e95d7bb2594e54e3c6f749897d79 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 23 May 2024 18:41:14 +0200 Subject: [PATCH 573/651] Build fix --- src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index df8fb6f6656..fb0f0ba9154 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -67,12 +67,11 @@ void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & bl return; is_dynamic_streams_initialized = true; - auto storage_snapshot = std::make_shared(data_part->storage, metadata_snapshot); for (const auto & column : columns_list) { if (column.type->hasDynamicSubcolumns()) { - auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec); + auto compression = getCodecDescOrDefault(column.name, default_codec); addStreams(column, block.getByName(column.name).column, compression); } } From 8d697123dac574e727101d241e4d16eae2bce8da Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 23 May 2024 16:36:24 +0200 Subject: [PATCH 574/651] CI: Cancel sync wf on new push --- .github/workflows/pull_request.yml | 3 +++ tests/ci/ci.py | 37 +++++++++++++++++++-------- tests/ci/ci_metadata.py | 41 +++++++++++++++++++++++++++--- tests/ci/env_helper.py | 1 + 4 files changed, 68 insertions(+), 14 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f20e987db97..48b4a558580 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -33,6 +33,9 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get version filter: tree:0 + - name: Cancel Sync PR workflow + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 99555b06bbf..68db08fbe96 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1908,13 +1908,26 @@ def _get_ext_check_name(check_name: str) -> str: return check_name_with_group -def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None: - run_id = CiMetadata(s3, pr_number).fetch_meta().run_id - if not run_id: - print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!") +def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> None: + wf_data = CiMetadata(s3, pr_number).fetch_meta() + if not cancel_sync: + if not wf_data.run_id: + print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!") + else: + print( + f"Canceling PR workflow run_id: [{wf_data.run_id}], pr: [{pr_number}]" + ) + GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), wf_data.run_id) else: - print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]") - GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), run_id) + if not wf_data.sync_pr_run_id: + print("WARNING: Sync PR run id has not been found") + else: + print(f"Canceling sync PR workflow run_id: [{wf_data.sync_pr_run_id}]") + GitHub.cancel_wf( + "ClickHouse/clickhouse-private", + get_best_robot_token(), + wf_data.sync_pr_run_id, + ) def main() -> int: @@ -1947,7 +1960,7 @@ def main() -> int: if args.configure: if CI and pr_info.is_pr: # store meta on s3 (now we need it only for PRs) - meta = CiMetadata(s3, pr_info.number) + meta = CiMetadata(s3, pr_info.number, pr_info.head_ref) meta.run_id = int(GITHUB_RUN_ID) meta.push_meta() @@ -2245,10 +2258,12 @@ def main() -> int: ### CANCEL PREVIOUS WORKFLOW RUN elif args.cancel_previous_run: - assert ( - pr_info.is_merge_queue - ), "Currently it's supposed to be used in MQ wf to cancel running PR wf if any" - _cancel_pr_wf(s3, pr_info.merged_pr) + if pr_info.is_merge_queue: + _cancel_pr_wf(s3, pr_info.merged_pr) + elif pr_info.is_pr: + _cancel_pr_wf(s3, pr_info.number, cancel_sync=True) + else: + assert False, "BUG! Not supported scenario" ### print results _print_results(result, args.outfile, args.pretty) diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py index 82d44cf1adc..a767d102811 100644 --- a/tests/ci/ci_metadata.py +++ b/tests/ci/ci_metadata.py @@ -4,9 +4,13 @@ from typing import Optional from env_helper import ( S3_BUILDS_BUCKET, TEMP_PATH, + GITHUB_UPSTREAM_REPOSITORY, + GITHUB_REPOSITORY, + S3_BUILDS_BUCKET_PUBLIC, ) from s3_helper import S3Helper from ci_utils import GHActions +from synchronizer_utils import SYNC_BRANCH_PREFIX # pylint: disable=too-many-lines @@ -22,13 +26,14 @@ class CiMetadata: _LOCAL_PATH = Path(TEMP_PATH) / "ci_meta" _FILE_SUFFIX = ".cimd" _FILENAME_RUN_ID = "run_id" + _FILE_SUFFIX + _FILENAME_SYNC_PR_RUN_ID = "sync_pr_run_id" + _FILE_SUFFIX def __init__( self, s3: S3Helper, pr_number: Optional[int] = None, - sha: Optional[str] = None, git_ref: Optional[str] = None, + sha: Optional[str] = None, ): assert pr_number or (sha and git_ref) @@ -37,12 +42,25 @@ class CiMetadata: self.git_ref = git_ref self.s3 = s3 self.run_id = 0 + self.upstream_pr_number = 0 + self.sync_pr_run_id = 0 if self.pr_number: self.s3_path = f"{self._S3_PREFIX}/PRs/{self.pr_number}/" else: self.s3_path = f"{self._S3_PREFIX}/{self.git_ref}/{self.sha}/" + # Process upstream StatusNames.SYNC: + # metadata path for upstream pr + self.s3_path_upstream = "" + if ( + self.git_ref + and self.git_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + self.upstream_pr_number = int(self.git_ref.split("/pr/", maxsplit=1)[1]) + self.s3_path_upstream = f"{self._S3_PREFIX}/PRs/{self.upstream_pr_number}/" + self._updated = False if not self._LOCAL_PATH.exists(): @@ -73,6 +91,8 @@ class CiMetadata: assert len(lines) == 1 if file_name.name == self._FILENAME_RUN_ID: self.run_id = int(lines[0]) + elif file_name.name == self._FILENAME_SYNC_PR_RUN_ID: + self.sync_pr_run_id = int(lines[0]) self._updated = True return self @@ -84,8 +104,15 @@ class CiMetadata: Uploads meta on s3 """ assert self.run_id + assert self.git_ref, "Push meta only with full info" + + if not self.upstream_pr_number: + log_title = f"Storing workflow metadata: PR [{self.pr_number}]" + else: + log_title = f"Storing workflow metadata: PR [{self.pr_number}], upstream PR [{self.upstream_pr_number}]" + GHActions.print_in_group( - f"Storing workflow metadata: PR [{self.pr_number}]", + log_title, [f"run_id: {self.run_id}"], ) @@ -96,9 +123,17 @@ class CiMetadata: _ = self.s3.upload_file( bucket=S3_BUILDS_BUCKET, file_path=local_file, - s3_path=self.s3_path + local_file.name, + s3_path=self.s3_path + self._FILENAME_RUN_ID, ) + if self.upstream_pr_number: + # store run id in upstream pr meta as well + _ = self.s3.upload_file( + bucket=S3_BUILDS_BUCKET_PUBLIC, + file_path=local_file, + s3_path=self.s3_path_upstream + self._FILENAME_SYNC_PR_RUN_ID, + ) + if __name__ == "__main__": # TEST: diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 9b9652d5bd3..64614ffa611 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -31,6 +31,7 @@ IMAGES_PATH = os.getenv("IMAGES_PATH", TEMP_PATH) REPO_COPY = os.getenv("REPO_COPY", GITHUB_WORKSPACE) RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp"))) S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") +S3_BUILDS_BUCKET_PUBLIC = "clickhouse-builds" S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports") S3_URL = os.getenv("S3_URL", "https://s3.amazonaws.com") S3_DOWNLOAD = os.getenv("S3_DOWNLOAD", S3_URL) From 741e0aedab78a009840f6346e582c905bb80be17 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 May 2024 16:53:11 +0000 Subject: [PATCH 575/651] Remove commented code. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 57 +---------------------- 1 file changed, 2 insertions(+), 55 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 3ccecac951d..2d34f1024d5 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -471,6 +471,7 @@ struct TableExpressionData return buffer.str(); } }; + class ExpressionsStack { public: @@ -2857,22 +2858,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope) { - //const auto & identifier_bind_part = identifier_lookup.identifier.front(); return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr; - - // auto get_alias_name_to_node_map = [&]() -> const std::unordered_map & - // { - // if (identifier_lookup.isExpressionLookup()) - // return *scope.alias_name_to_expression_node; - // else if (identifier_lookup.isFunctionLookup()) - // return scope.alias_name_to_lambda_node; - - // return scope.alias_name_to_table_expression_node; - // }; - - // const auto & alias_name_to_node_map = get_alias_name_to_node_map(); - - // return alias_name_to_node_map.contains(identifier_bind_part); } /** Resolve identifier from scope aliases. @@ -2922,23 +2908,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier { const auto & identifier_bind_part = identifier_lookup.identifier.front(); - // auto get_alias_name_to_node_map = [&]() -> std::unordered_map & - // { - // if (identifier_lookup.isExpressionLookup()) - // return *scope.alias_name_to_expression_node; - // else if (identifier_lookup.isFunctionLookup()) - // return scope.alias_name_to_lambda_node; - - // return scope.alias_name_to_table_expression_node; - // }; - - // auto & alias_name_to_node_map = get_alias_name_to_node_map(); - // auto it = alias_name_to_node_map.find(identifier_bind_part); - - // if (it == alias_name_to_node_map.end()) - // return {}; - - auto it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME); + auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME); if (it == nullptr) return {}; @@ -2988,20 +2958,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier } alias_node = lookup_result.resolved_identifier; - - /** During collection of aliases if node is identifier and has alias, we cannot say if it is - * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification. - * - * If we resolved identifier node as expression, we must remove identifier node alias from - * function alias map. - * If we resolved identifier node as function, we must remove identifier node alias from - * expression alias map. - */ - // if (identifier_lookup.isExpressionLookup()) - // scope.alises.alias_name_to_lambda_node.erase(identifier_bind_part); - // else if (identifier_lookup.isFunctionLookup()) - // scope.aliases.alias_name_to_expression_node->erase(identifier_bind_part); - scope.popExpressionNode(); } else if (node_type == QueryTreeNodeType::FUNCTION) @@ -4199,7 +4155,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook */ auto * alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME); - //auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName()); if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN) { const auto & column_node = (*alias_it)->as(); @@ -6395,17 +6350,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id result_projection_names.push_back(projection_name_it->second); } - // if (resolved_identifier_node && !node_alias.empty()) - // scope.alias_name_to_lambda_node.erase(node_alias); - if (!resolved_identifier_node && allow_lambda_expression) - { resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier; - // if (resolved_identifier_node && !node_alias.empty()) - // scope.alias_name_to_expression_node->erase(node_alias); - } - if (!resolved_identifier_node && allow_table_expression) { resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier; From dab090e629afd3730457599d84e147bb512a1e81 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 17:14:06 +0000 Subject: [PATCH 576/651] Cosmetics, pt. VII (includes a move of all snowflake-related functions in one document) --- .../functions/type-conversion-functions.md | 140 ---------------- .../sql-reference/functions/uuid-functions.md | 155 +++++++++++++++++- 2 files changed, 149 insertions(+), 146 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ea08ffa50e7..bab92ff1e67 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1979,143 +1979,3 @@ Result: │ 2,"good" │ └───────────────────────────────────────────┘ ``` - -## snowflakeToDateTime - -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. - -**Syntax** - -``` sql -snowflakeToDateTime(value[, time_zone]) -``` - -**Arguments** - -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). - -**Returned value** - -- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value. - -**Example** - -Query: - -``` sql -SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); -``` - -Result: - -```response - -┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ -│ 2021-08-15 10:57:56 │ -└──────────────────────────────────────────────────────────────────┘ -``` - -## snowflakeToDateTime64 - -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. - -**Syntax** - -``` sql -snowflakeToDateTime64(value[, time_zone]) -``` - -**Arguments** - -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). - -**Returned value** - -- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision. - -**Example** - -Query: - -``` sql -SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); -``` - -Result: - -```response - -┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ -│ 2021-08-15 10:58:19.841 │ -└────────────────────────────────────────────────────────────────────┘ -``` - -## dateTimeToSnowflake - -Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. - -**Syntax** - -``` sql -dateTimeToSnowflake(value) -``` - -**Arguments** - -- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). - -**Returned value** - -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. - -**Example** - -Query: - -``` sql -WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); -``` - -Result: - -```response -┌─dateTimeToSnowflake(dt)─┐ -│ 1426860702823350272 │ -└─────────────────────────┘ -``` - -## dateTime64ToSnowflake - -Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. - -**Syntax** - -``` sql -dateTime64ToSnowflake(value) -``` - -**Arguments** - -- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). - -**Returned value** - -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. - -**Example** - -Query: - -``` sql -WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); -``` - -Result: - -```response -┌─dateTime64ToSnowflake(dt64)─┐ -│ 1426860704886947840 │ -└─────────────────────────────┘ -``` diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 80d7215b9ef..7c264450ef0 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -674,7 +674,7 @@ Result: └──────────────────────────────────────────────────────────────────────────────────────┘ ``` -## serverUUID() +## serverUUID Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts. @@ -692,9 +692,9 @@ Type: [UUID](../data-types/uuid.md). ## generateSnowflakeID -Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231). +Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). -Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. @@ -756,11 +756,14 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); ## generateSnowflakeIDThreadMonotonic -Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231). +Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). -Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. +In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. -This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs. +This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests. +Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs. ``` 0 1 2 3 @@ -816,6 +819,146 @@ SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic └───────────────────────────────────────┴───────────────────────────────────────┘ ``` +## snowflakeToDateTime + +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. + +**Syntax** + +``` sql +snowflakeToDateTime(value[, time_zone]) +``` + +**Arguments** + +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value. + +**Example** + +Query: + +``` sql +SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); +``` + +Result: + +```response + +┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:57:56 │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## snowflakeToDateTime64 + +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. + +**Syntax** + +``` sql +snowflakeToDateTime64(value[, time_zone]) +``` + +**Arguments** + +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision. + +**Example** + +Query: + +``` sql +SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); +``` + +Result: + +```response + +┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:58:19.841 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +## dateTimeToSnowflake + +Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTimeToSnowflake(value) +``` + +**Arguments** + +- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). + +**Returned value** + +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. + +**Example** + +Query: + +``` sql +WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); +``` + +Result: + +```response +┌─dateTimeToSnowflake(dt)─┐ +│ 1426860702823350272 │ +└─────────────────────────┘ +``` + +## dateTime64ToSnowflake + +Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTime64ToSnowflake(value) +``` + +**Arguments** + +- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). + +**Returned value** + +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. + +**Example** + +Query: + +``` sql +WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); +``` + +Result: + +```response +┌─dateTime64ToSnowflake(dt64)─┐ +│ 1426860704886947840 │ +└─────────────────────────────┘ +``` + ## See also - [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) From 5d82a94615ef8a9fb7c39787d0e2b191641cbcb8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 17:22:59 +0000 Subject: [PATCH 577/651] Revert generateSerialID --- src/Functions/generateSerialID.cpp | 167 ------------------ .../03129_serial_test_zookeeper.reference | 13 -- .../03129_serial_test_zookeeper.sql | 12 -- 3 files changed, 192 deletions(-) delete mode 100644 src/Functions/generateSerialID.cpp delete mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.reference delete mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.sql diff --git a/src/Functions/generateSerialID.cpp b/src/Functions/generateSerialID.cpp deleted file mode 100644 index db26d0d684b..00000000000 --- a/src/Functions/generateSerialID.cpp +++ /dev/null @@ -1,167 +0,0 @@ -#include "Common/Exception.h" -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int KEEPER_EXCEPTION; -} - -constexpr auto function_node_name = "/serial_ids/"; -constexpr size_t MAX_SERIES_NUMBER = 1000; // ? - -class FunctionSerial : public IFunction -{ -private: - mutable zkutil::ZooKeeperPtr zk; - ContextPtr context; - -public: - static constexpr auto name = "generateSerialID"; - - explicit FunctionSerial(ContextPtr context_) : context(context_) - { - if (context->hasZooKeeper()) { - zk = context->getZooKeeper(); - } - } - - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(std::move(context)); - } - - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 1; } - bool isStateful() const override { return true; } - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return false; } - bool isSuitableForConstantFolding() const override { return false; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForNothing() const override { return false; } - bool canBeExecutedOnDefaultArguments() const override { return false; } - bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; } - bool hasInformationAboutMonotonicity() const override { return true; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - FunctionArgumentDescriptors mandatory_args{ - {"series identifier", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"} - }; - validateFunctionArgumentTypes(*this, arguments, mandatory_args); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - if (zk == nullptr) - throw Exception(ErrorCodes::KEEPER_EXCEPTION, - "ZooKeeper is not configured for function {}", - getName()); - if (zk->expired()) - zk = context->getZooKeeper(); - - // slow? - if (zk->exists(function_node_name) && zk->getChildren(function_node_name).size() == MAX_SERIES_NUMBER) { - throw Exception(ErrorCodes::KEEPER_EXCEPTION, - "At most {} serial nodes can be created", - MAX_SERIES_NUMBER); - } - - auto col_res = ColumnVector::create(); - typename ColumnVector::Container & vec_to = col_res->getData(); - - vec_to.resize(input_rows_count); - - const auto & serial_path = function_node_name + arguments[0].column->getDataAt(0).toString(); - - /// CAS in ZooKeeper - /// `get` value and version, `trySet` new with version check - /// I didn't get how to do it with `multi` - - Int64 counter; - std::string counter_path = serial_path + "/counter"; - - // if serial name used first time - zk->createAncestors(counter_path); - zk->createIfNotExists(counter_path, "1"); - - Coordination::Stat stat; - while (true) - { - const String counter_string = zk->get(counter_path, &stat); - counter = std::stoll(counter_string); - String updated_counter = std::to_string(counter + input_rows_count); - const Coordination::Error err = zk->trySet(counter_path, updated_counter); - if (err == Coordination::Error::ZOK) - { - // CAS is done - break; - } - if (err != Coordination::Error::ZBADVERSION) - { - throw Exception(ErrorCodes::KEEPER_EXCEPTION, - "ZooKeeper trySet operation failed with unexpected error = {} in function {}", - err, getName()); - } - } - - // Make a result - for (auto & val : vec_to) - { - val = counter; - ++counter; - } - - return col_res; - } - -}; - -REGISTER_FUNCTION(Serial) -{ - factory.registerFunction(FunctionDocumentation - { - .description=R"( -Generates and returns sequential numbers starting from the previous counter value. -This function takes a constant string argument - a series identifier. -The server should be configured with a ZooKeeper. -)", - .syntax = "generateSerialID(identifier)", - .arguments{ - {"series identifier", "Series identifier (String or FixedString)"} - }, - .returned_value = "Sequential numbers of type Int64 starting from the previous counter value", - .examples{ - {"first call", "SELECT generateSerialID('id1')", R"( -┌─generateSerialID('id1')──┐ -│ 1 │ -└──────────────────────────┘)"}, - {"second call", "SELECT generateSerialID('id1')", R"( -┌─generateSerialID('id1')──┐ -│ 2 │ -└──────────────────────────┘)"}, - {"column call", "SELECT *, generateSerialID('id1') FROM test_table", R"( -┌─CounterID─┬─UserID─┬─ver─┬─generateSerialID('id1')──┐ -│ 1 │ 3 │ 3 │ 3 │ -│ 1 │ 1 │ 1 │ 4 │ -│ 1 │ 2 │ 2 │ 5 │ -│ 1 │ 5 │ 5 │ 6 │ -│ 1 │ 4 │ 4 │ 7 │ -└───────────┴────────┴─────┴──────────────────────────┘ - )"}}, - .categories{"Unique identifiers"} - }); -} - -} diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference deleted file mode 100644 index 479030db4be..00000000000 --- a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference +++ /dev/null @@ -1,13 +0,0 @@ -1 -2 -1 -3 -4 -5 -6 -7 -1 1 -2 2 -3 3 -4 4 -5 5 diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql deleted file mode 100644 index 2bd60656259..00000000000 --- a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql +++ /dev/null @@ -1,12 +0,0 @@ --- Tags: zookeeper - -SELECT generateSerialID('x'); -SELECT generateSerialID('x'); -SELECT generateSerialID('y'); -SELECT generateSerialID('x') FROM numbers(5); - -SELECT generateSerialID(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT generateSerialID('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT generateSerialID(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } - -SELECT generateSerialID('z'), generateSerialID('z') FROM numbers(5); From 12f60a4969acda49422aef5d5d6fc431a71109f7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 18:00:53 +0000 Subject: [PATCH 578/651] Cosmetics, pt. VIII --- src/Functions/generateSnowflakeID.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 4e61bd9fb1c..617693f017c 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -42,6 +42,13 @@ constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - /// max values constexpr uint64_t max_machine_seq_num = machine_seq_num_mask; +uint64_t getTimestamp() +{ + auto now = std::chrono::system_clock::now(); + auto ticks_since_epoch = std::chrono::duration_cast(now.time_since_epoch()).count(); + return static_cast(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1); +} + uint64_t getMachineId() { UUID server_uuid = ServerUUID::get(); @@ -52,31 +59,24 @@ uint64_t getMachineId() return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count; } -uint64_t getTimestamp() -{ - auto now = std::chrono::system_clock::now(); - auto ticks_since_epoch = std::chrono::duration_cast(now.time_since_epoch()).count(); - return static_cast(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1); -} - struct SnowflakeId { uint64_t timestamp; - uint64_t machind_id; + uint64_t machine_id; uint64_t machine_seq_num; }; SnowflakeId toSnowflakeId(uint64_t snowflake) { return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)), - .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count), + .machine_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count), .machine_seq_num = (snowflake & machine_seq_num_mask)}; } uint64_t fromSnowflakeId(SnowflakeId components) { return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) | - components.machind_id << (machine_seq_num_bits_count) | + components.machine_id << (machine_seq_num_bits_count) | components.machine_seq_num); } @@ -93,7 +93,7 @@ struct SnowflakeIdRange SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count) { /// 1. `now` - SnowflakeId begin = {.timestamp = getTimestamp(), .machind_id = getMachineId(), .machine_seq_num = 0}; + SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0}; /// 2. `begin` if (begin.timestamp <= available.timestamp) @@ -111,7 +111,7 @@ SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t in else end.timestamp = begin.timestamp; - end.machind_id = begin.machind_id; + end.machine_id = begin.machine_id; end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask; return {begin, end}; From ae8ceaa35e0cb6804774881e05bccf07ab23aa19 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 18:38:30 +0000 Subject: [PATCH 579/651] Cosmetics, pt. IX and cached machineId computation --- src/Functions/generateSnowflakeID.cpp | 25 +++++++++++++------ .../03130_generateSnowflakeId.reference | 4 +-- .../0_stateless/03130_generateSnowflakeId.sql | 14 ++++++----- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 617693f017c..c3f7701a05a 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -49,7 +49,7 @@ uint64_t getTimestamp() return static_cast(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1); } -uint64_t getMachineId() +uint64_t getMachineIdImpl() { UUID server_uuid = ServerUUID::get(); /// hash into 64 bits @@ -59,6 +59,12 @@ uint64_t getMachineId() return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count; } +uint64_t getMachineId() +{ + static uint64_t machine_id = getMachineIdImpl(); + return machine_id; +} + struct SnowflakeId { uint64_t timestamp; @@ -106,7 +112,7 @@ SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t in SnowflakeId end; const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1); if (input_rows_count >= seq_nums_in_current_timestamp_left) - /// if sequence numbers in current timestamp is not enough for rows => update timestamp + /// if sequence numbers in current timestamp is not enough for rows --> depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1); else end.timestamp = begin.timestamp; @@ -136,8 +142,8 @@ struct GlobalCounterPolicy range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count); } while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); - /// if `compare_exhange` failed => another thread updated `lowest_available_snowflake_id` and we should try again - /// completed => range of IDs [begin, end) is reserved, can return the beginning of the range + /// if CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try + /// else --> our thread reserved ID range [begin, end) and return the beginning of the range return range.begin; } @@ -200,18 +206,21 @@ public: vec_to.resize(input_rows_count); typename FillPolicy::Data data; - - /// get the begin of available snowflake ids range - SnowflakeId snowflake_id = data.reserveRange(input_rows_count); + SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range for (UInt64 & to_row : vec_to) { to_row = fromSnowflakeId(snowflake_id); - if (snowflake_id.machine_seq_num++ == max_machine_seq_num) + if (snowflake_id.machine_seq_num == max_machine_seq_num) { + /// handle overflow snowflake_id.machine_seq_num = 0; ++snowflake_id.timestamp; } + else + { + ++snowflake_id.machine_seq_num; + } } } diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index 8cdced96770..6ec0cafab16 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -1,11 +1,11 @@ --- generateSnowflakeID -- +-- generateSnowflakeID 1 1 0 0 1 100 --- generateSnowflakeIDThreadMonotonic -- +-- generateSnowflakeIDThreadMonotonic 1 1 100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index 3e994149d2b..903be5b786c 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -1,10 +1,11 @@ -SELECT '-- generateSnowflakeID --'; +SELECT '-- generateSnowflakeID'; + SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -- check machine sequence number is zero SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero -SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -SELECT generateSnowflakeID() = generateSnowflakeID(1); -SELECT generateSnowflakeID(1) = generateSnowflakeID(1); +SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs +SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } @@ -15,7 +16,8 @@ FROM FROM numbers(100) ); -SELECT '-- generateSnowflakeIDThreadMonotonic --'; +SELECT '-- generateSnowflakeIDThreadMonotonic'; + SELECT bitShiftLeft(toUInt64(generateSnowflakeIDThreadMonotonic()), 52) = 0; -- check machine sequence number is zero SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero @@ -26,4 +28,4 @@ FROM ( SELECT DISTINCT generateSnowflakeIDThreadMonotonic() FROM numbers(100) -); \ No newline at end of file +); From 40753ddefb0324d50bb8d455615da74828c7be76 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 23 May 2024 21:10:40 +0200 Subject: [PATCH 580/651] Update hdfs test --- tests/integration/test_storage_hdfs/test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 6ee12a87ebf..eeffa8ed00b 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -326,7 +326,7 @@ def test_virtual_columns(started_cluster): hdfs_api.write_data("/file1", "1\n") hdfs_api.write_data("/file2", "2\n") hdfs_api.write_data("/file3", "3\n") - expected = "1\tfile1\t/file1\n2\tfile2\t/file2\n3\tfile3\t/file3\n" + expected = "1\tfile1\tfile1\n2\tfile2\tfile2\n3\tfile3\tfile3\n" assert ( node1.query( "select id, _file as file_name, _path as file_path from virtual_cols order by id" @@ -493,13 +493,13 @@ def test_hdfsCluster(started_cluster): actual = node1.query( "select id, _file as file_name, _path as file_path from hdfs('hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id" ) - expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n" + expected = "1\tfile1\ttest_hdfsCluster/file1\n2\tfile2\ttest_hdfsCluster/file2\n3\tfile3\ttest_hdfsCluster/file3\n" assert actual == expected actual = node1.query( "select id, _file as file_name, _path as file_path from hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id" ) - expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n" + expected = "1\tfile1\ttest_hdfsCluster/file1\n2\tfile2\ttest_hdfsCluster/file2\n3\tfile3\ttest_hdfsCluster/file3\n" assert actual == expected fs.delete(dir, recursive=True) @@ -665,7 +665,7 @@ def test_virtual_columns_2(started_cluster): node1.query(f"insert into table function {table_function} SELECT 1, 'kek'") result = node1.query(f"SELECT _path FROM {table_function}") - assert result.strip() == "/parquet_2" + assert result.strip() == "parquet_2" table_function = ( f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')" @@ -978,25 +978,25 @@ def test_read_subcolumns(started_cluster): f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert res == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + assert res == "2\ttest_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" res = node.query( f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert res == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + assert res == "2\ttest_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert res == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" ) - assert res == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + assert res == "42\ttest_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" def test_union_schema_inference_mode(started_cluster): From bd15e1311a949753a234cfed9571600af78eb906 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 23 May 2024 22:35:21 +0200 Subject: [PATCH 581/651] CI: fix --- tests/ci/ci.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 68db08fbe96..4afd3f46f9d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1917,7 +1917,7 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No print( f"Canceling PR workflow run_id: [{wf_data.run_id}], pr: [{pr_number}]" ) - GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), wf_data.run_id) + GitHub.cancel_wf(GITHUB_REPOSITORY, wf_data.run_id, get_best_robot_token()) else: if not wf_data.sync_pr_run_id: print("WARNING: Sync PR run id has not been found") @@ -1925,8 +1925,8 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No print(f"Canceling sync PR workflow run_id: [{wf_data.sync_pr_run_id}]") GitHub.cancel_wf( "ClickHouse/clickhouse-private", - get_best_robot_token(), wf_data.sync_pr_run_id, + get_best_robot_token(), ) From dac31fb92a80982ec0a98472485fa02c4b917c07 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 17:29:00 +0000 Subject: [PATCH 582/651] Include settings into query cache key --- src/Interpreters/Cache/QueryCache.cpp | 37 ++++++++-- src/Interpreters/Cache/QueryCache.h | 5 +- src/Interpreters/executeQuery.cpp | 4 +- .../02494_query_cache_key.reference | 6 ++ .../0_stateless/02494_query_cache_key.sql | 70 +++++++++++++++++++ .../02494_query_cache_use_database.reference | 2 - .../02494_query_cache_use_database.sql | 30 -------- 7 files changed, 113 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/02494_query_cache_key.reference create mode 100644 tests/queries/0_stateless/02494_query_cache_key.sql delete mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.reference delete mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.sql diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 4b10bfd3dcd..a3fe8c2e779 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -126,6 +126,11 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context) namespace { +bool isQueryCacheRelatedSetting(const String & setting_name) +{ + return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache"); +} + class RemoveQueryCacheSettingsMatcher { public: @@ -141,7 +146,7 @@ public: auto is_query_cache_related_setting = [](const auto & change) { - return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache"); + return isQueryCacheRelatedSetting(change.name); }; std::erase_if(set_clause->changes, is_query_cache_related_setting); @@ -177,11 +182,11 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast) return transformed_ast; } -IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database) +IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings) { ast = removeQueryCacheSettings(ast); - /// Hash the AST, it must consider aliases (issue #56258) + /// Hash the AST, we must consider aliases (issue #56258) SipHash hash; ast->updateTreeHash(hash, /*ignore_aliases=*/ false); @@ -189,6 +194,25 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database) /// tables (issue #64136) hash.update(current_database); + /// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`). + /// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order. + /// Therefore, collect and sort the settings first, then hash them. + Settings::Range changed_settings = settings.allChanged(); + std::vector> changed_settings_sorted; /// (name, value) + for (const auto & setting : changed_settings) + { + const String & name = setting.getName(); + const String & value = setting.getValueString(); + if (!isQueryCacheRelatedSetting(name)) /// see removeQueryCacheSettings() why this is a good idea + changed_settings_sorted.push_back({name, value}); + } + std::sort(changed_settings_sorted.begin(), changed_settings_sorted.end(), [](auto & lhs, auto & rhs) { return lhs.first < rhs.first; }); + for (const auto & setting : changed_settings_sorted) + { + hash.update(setting.first); + hash.update(setting.second); + } + return getSipHash128AsPair(hash); } @@ -204,12 +228,13 @@ String queryStringFromAST(ASTPtr ast) QueryCache::Key::Key( ASTPtr ast_, const String & current_database, + const Settings & settings, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, std::chrono::time_point expires_at_, bool is_compressed_) - : ast_hash(calculateAstHash(ast_, current_database)) + : ast_hash(calculateAstHash(ast_, current_database, settings)) , header(header_) , user_id(user_id_) , current_user_roles(current_user_roles_) @@ -220,8 +245,8 @@ QueryCache::Key::Key( { } -QueryCache::Key::Key(ASTPtr ast_, const String & current_database, std::optional user_id_, const std::vector & current_user_roles_) - : QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles +QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_) + : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles { } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index b5b6f477137..461197cac32 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -14,6 +14,8 @@ namespace DB { +struct Settings; + /// Does AST contain non-deterministic functions like rand() and now()? bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); @@ -89,6 +91,7 @@ public: /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, const String & current_database, + const Settings & settings, Block header_, std::optional user_id_, const std::vector & current_user_roles_, bool is_shared_, @@ -96,7 +99,7 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, const String & current_database, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 56f08dbb902..0b5f68f27f6 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1101,7 +1101,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getCurrentDatabase(), context->getUserID(), context->getCurrentRoles()); + QueryCache::Key key(ast, context->getCurrentDatabase(), settings, context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1224,7 +1224,7 @@ static std::tuple executeQueryImpl( && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( - ast, context->getCurrentDatabase(), res.pipeline.getHeader(), + ast, context->getCurrentDatabase(), settings, res.pipeline.getHeader(), context->getUserID(), context->getCurrentRoles(), settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), diff --git a/tests/queries/0_stateless/02494_query_cache_key.reference b/tests/queries/0_stateless/02494_query_cache_key.reference new file mode 100644 index 00000000000..8f5b61192d5 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_key.reference @@ -0,0 +1,6 @@ +Test (1) +1 +2 +Test (2) +4 +4 diff --git a/tests/queries/0_stateless/02494_query_cache_key.sql b/tests/queries/0_stateless/02494_query_cache_key.sql new file mode 100644 index 00000000000..d8c68e0d267 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_key.sql @@ -0,0 +1,70 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Tests that the key of the query cache is not only formed by the query AST but also by +-- (1) the current database (`USE db`, issue #64136), +-- (2) the query settings + + +SELECT 'Test (1)'; + +SYSTEM DROP QUERY CACHE; + +DROP DATABASE IF EXISTS db1; +DROP DATABASE IF EXISTS db2; + +CREATE DATABASE db1; +CREATE DATABASE db2; + +CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a); +CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a); + +INSERT INTO db1.tab values(1); +INSERT INTO db2.tab values(2); + +USE db1; +SELECT * FROM tab SETTINGS use_query_cache=1; + +USE db2; +SELECT * FROM tab SETTINGS use_query_cache=1; + +DROP DATABASE db1; +DROP DATABASE db2; + +SYSTEM DROP QUERY CACHE; + + +SELECT 'Test (2)'; + +-- test with query-level settings +SELECT 1 SETTINGS use_query_cache = 1, limit = 1, use_skip_indexes = 0 Format Null; +SELECT 1 SETTINGS use_query_cache = 1, use_skip_indexes = 0 Format Null; +SELECT 1 SETTINGS use_query_cache = 1, use_skip_indexes = 1 Format Null; +SELECT 1 SETTINGS use_query_cache = 1, max_block_size = 1 Format Null; + +-- 4x the same query but with different settings each. There should yield four entries in the query cache. +SELECT count(query) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +-- test with mixed session-level/query-level settings +SET use_query_cache = 1; +SET limit = 1; +SELECT 1 SETTINGS use_skip_indexes = 0 Format Null; +SET limit = default; +SET use_skip_indexes = 0; +SELECT 1 Format Null; +SET use_skip_indexes = 1; +SELECT 1 SETTINGS use_skip_indexes = 1 Format Null; +SET use_skip_indexes = default; +SET max_block_size = 1; +SELECT 1 Format Null; +SET max_block_size = default; + +SET use_query_cache = default; + +-- 4x the same query but with different settings each. There should yield four entries in the query cache. +SELECT count(query) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.reference b/tests/queries/0_stateless/02494_query_cache_use_database.reference deleted file mode 100644 index 1191247b6d9..00000000000 --- a/tests/queries/0_stateless/02494_query_cache_use_database.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -2 diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.sql b/tests/queries/0_stateless/02494_query_cache_use_database.sql deleted file mode 100644 index df560f82ebb..00000000000 --- a/tests/queries/0_stateless/02494_query_cache_use_database.sql +++ /dev/null @@ -1,30 +0,0 @@ --- Tags: no-parallel, no-fasttest --- Tag no-fasttest: Depends on OpenSSL --- Tag no-parallel: Messes with internal cache - --- Test for issue #64136 - -SYSTEM DROP QUERY CACHE; - -DROP DATABASE IF EXISTS db1; -DROP DATABASE IF EXISTS db2; - -CREATE DATABASE db1; -CREATE DATABASE db2; - -CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a); -CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a); - -INSERT INTO db1.tab values(1); -INSERT INTO db2.tab values(2); - -USE db1; -SELECT * FROM tab SETTINGS use_query_cache=1; - -USE db2; -SELECT * FROM tab SETTINGS use_query_cache=1; - -DROP DATABASE db1; -DROP DATABASE db2; - -SYSTEM DROP QUERY CACHE; From 6e6e2944b56245cd5eefd14deb7dba7b8459b935 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 21:26:33 +0000 Subject: [PATCH 583/651] Fix glitch in #62696 --- src/Functions/FunctionHelpers.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index 3b057779ffe..d85bb0e7060 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -21,8 +21,6 @@ namespace ErrorCodes const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column) { - if (!column) - return {}; if (!isColumnConst(*column)) return {}; From 5710b5852f9e067fbcd8809196c9c403a8de43dc Mon Sep 17 00:00:00 2001 From: Nataly Merezhuk Date: Thu, 23 May 2024 17:45:58 -0400 Subject: [PATCH 584/651] Adds note - file engine unavailable in ClickHouse Cloud. --- docs/en/engines/table-engines/special/file.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index fdf5242ba3b..0d422f64762 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -14,6 +14,10 @@ Usage scenarios: - Convert data from one format to another. - Updating data in ClickHouse via editing a file on a disk. +:::note +This engine is not currently available in ClickHouse Cloud, please [use the S3 table function instead](/docs/en/sql-reference/table-functions/s3.md). +::: + ## Usage in ClickHouse Server {#usage-in-clickhouse-server} ``` sql From 251010f109a538c770f830bc254e031924486c46 Mon Sep 17 00:00:00 2001 From: TTPO100AJIEX Date: Fri, 24 May 2024 02:14:26 +0300 Subject: [PATCH 585/651] Move protocol-server and inter-server management into separate classes Co-authored-by: Alex Koledaev --- programs/server/Server.cpp | 987 +----------------- programs/server/Server.h | 95 +- src/CMakeLists.txt | 1 + src/Server/ServersManager/IServersManager.cpp | 268 +++++ src/Server/ServersManager/IServersManager.h | 74 ++ .../ServersManager/InterServersManager.cpp | 327 ++++++ .../ServersManager/InterServersManager.h | 45 + .../ServersManager/ProtocolServersManager.cpp | 523 ++++++++++ .../ServersManager/ProtocolServersManager.h | 37 + 9 files changed, 1325 insertions(+), 1032 deletions(-) create mode 100644 src/Server/ServersManager/IServersManager.cpp create mode 100644 src/Server/ServersManager/IServersManager.h create mode 100644 src/Server/ServersManager/InterServersManager.cpp create mode 100644 src/Server/ServersManager/InterServersManager.h create mode 100644 src/Server/ServersManager/ProtocolServersManager.cpp create mode 100644 src/Server/ServersManager/ProtocolServersManager.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 223bc1f77e7..b62ae40924c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,8 +6,6 @@ #include #include #include -#include -#include #include #include #include @@ -44,11 +42,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -83,29 +79,19 @@ #include #include #include -#include #include "MetricsTransmitter.h" #include -#include -#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include #include #include #include -#include #include "config.h" #include @@ -119,19 +105,9 @@ #endif #if USE_SSL -# include # include #endif -#if USE_GRPC -# include -#endif - -#if USE_NURAFT -# include -# include -#endif - #if USE_JEMALLOC # include #endif @@ -159,18 +135,6 @@ namespace ProfileEvents { extern const Event MainConfigLoads; extern const Event ServerStartupMilliseconds; - extern const Event InterfaceNativeSendBytes; - extern const Event InterfaceNativeReceiveBytes; - extern const Event InterfaceHTTPSendBytes; - extern const Event InterfaceHTTPReceiveBytes; - extern const Event InterfacePrometheusSendBytes; - extern const Event InterfacePrometheusReceiveBytes; - extern const Event InterfaceInterserverSendBytes; - extern const Event InterfaceInterserverReceiveBytes; - extern const Event InterfaceMySQLSendBytes; - extern const Event InterfaceMySQLReceiveBytes; - extern const Event InterfacePostgreSQLSendBytes; - extern const Event InterfacePostgreSQLReceiveBytes; } namespace fs = std::filesystem; @@ -238,11 +202,9 @@ namespace DB namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; - extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; - extern const int NETWORK_ERROR; extern const int CORRUPTED_DATA; } @@ -257,115 +219,6 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } -Poco::Net::SocketAddress Server::socketBindListen( - const Poco::Util::AbstractConfiguration & config, - Poco::Net::ServerSocket & socket, - const std::string & host, - UInt16 port, - [[maybe_unused]] bool secure) const -{ - auto address = makeSocketAddress(host, port, &logger()); - socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false)); - /// If caller requests any available port from the OS, discover it after binding. - if (port == 0) - { - address = socket.address(); - LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port()); - } - - socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096)); - - return address; -} - -Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) -{ - auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host"); - if (listen_hosts.empty()) - { - listen_hosts.emplace_back("::1"); - listen_hosts.emplace_back("127.0.0.1"); - } - return listen_hosts; -} - -Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) -{ - auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host"); - if (!interserver_listen_hosts.empty()) - return interserver_listen_hosts; - - /// Use more general restriction in case of emptiness - return getListenHosts(config); -} - -bool getListenTry(const Poco::Util::AbstractConfiguration & config) -{ - bool listen_try = config.getBool("listen_try", false); - if (!listen_try) - { - Poco::Util::AbstractConfiguration::Keys protocols; - config.keys("protocols", protocols); - listen_try = - DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() && - std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol) - { - return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); - }); - } - return listen_try; -} - - -void Server::createServer( - Poco::Util::AbstractConfiguration & config, - const std::string & listen_host, - const char * port_name, - bool listen_try, - bool start_server, - std::vector & servers, - CreateServerFunc && func) const -{ - /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. - if (config.getString(port_name, "").empty()) - return; - - /// If we already have an active server for this listen_host/port_name, don't create it again - for (const auto & server : servers) - { - if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name) - return; - } - - auto port = config.getInt(port_name); - try - { - servers.push_back(func(port)); - if (start_server) - { - servers.back().start(); - LOG_INFO(&logger(), "Listening for {}", servers.back().getDescription()); - } - global_context->registerServerPort(port_name, port); - } - catch (const Poco::Exception &) - { - if (listen_try) - { - LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " - "then consider to " - "specify not disabled IPv4 or IPv6 address to listen in element of configuration " - "file. Example for disabled IPv6: 0.0.0.0 ." - " Example for disabled IPv4: ::", - listen_host, port, getCurrentExceptionMessage(false)); - } - else - { - throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); - } - } -} - #if defined(OS_LINUX) namespace @@ -665,6 +518,7 @@ try ServerSettings server_settings; server_settings.loadSettingsFromConfig(config()); + Poco::ThreadPool server_pool(3, server_settings.max_connections); ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings.format_alter_operations_with_parentheses); @@ -721,11 +575,6 @@ try CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); - Poco::ThreadPool server_pool(3, server_settings.max_connections); - std::mutex servers_lock; - std::vector servers; - std::vector servers_to_start_before_tables; - /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -775,6 +624,10 @@ try bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log"); + std::mutex servers_lock; + ProtocolServersManager servers(context(), &logger()); + InterServersManager servers_to_start_before_tables(context(), &logger()); + // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. @@ -806,32 +659,7 @@ try LOG_DEBUG(log, "Shut down storages."); - if (!servers_to_start_before_tables.empty()) - { - LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); - size_t current_connections = 0; - { - std::lock_guard lock(servers_lock); - for (auto & server : servers_to_start_before_tables) - { - server.stop(); - current_connections += server.currentConnections(); - } - } - - if (current_connections) - LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); - else - LOG_INFO(log, "Closed all listening sockets."); - - if (current_connections > 0) - current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished); - - if (current_connections) - LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); - else - LOG_INFO(log, "Closed connections to servers for tables."); - } + servers_to_start_before_tables.stopServers(server_settings, servers_lock); global_context->shutdownKeeperDispatcher(); @@ -928,19 +756,13 @@ try server_settings.asynchronous_heavy_metrics_update_period_s, [&]() -> std::vector { - std::vector metrics; - std::lock_guard lock(servers_lock); - metrics.reserve(servers_to_start_before_tables.size() + servers.size()); - - for (const auto & server : servers_to_start_before_tables) - metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - - for (const auto & server : servers) - metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - return metrics; - } - ); + std::vector metrics1 = servers_to_start_before_tables.getMetrics(); + std::vector metrics2 = servers.getMetrics(); + metrics1.reserve(metrics1.size() + metrics2.size()); + metrics1.insert(metrics1.end(), std::make_move_iterator(metrics2.begin()), std::make_move_iterator(metrics2.end())); + return metrics1; + }); zkutil::validateZooKeeperConfig(config()); bool has_zookeeper = zkutil::hasZooKeeperConfig(config()); @@ -1588,7 +1410,8 @@ try if (global_context->isServerCompletelyStarted()) { std::lock_guard lock(servers_lock); - updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); + servers.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config); + servers_to_start_before_tables.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config); } } @@ -1635,141 +1458,17 @@ try /// Must be the last. latest_config = config; }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + /* already_loaded = */ false); /// Reload it right now (initial loading) - const auto listen_hosts = getListenHosts(config()); - const auto interserver_listen_hosts = getInterserverListenHosts(config()); - const auto listen_try = getListenTry(config()); - - if (config().has("keeper_server.server_id")) - { -#if USE_NURAFT - //// If we don't have configured connection probably someone trying to use clickhouse-server instead - //// of clickhouse-keeper, so start synchronously. - bool can_initialize_keeper_async = false; - - if (has_zookeeper) /// We have configured connection to some zookeeper cluster - { - /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start - /// synchronously. - can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); - } - /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(can_initialize_keeper_async); - FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); - - auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & - { - return global_context->getConfigRef(); - }; - - for (const auto & listen_host : listen_hosts) - { - /// TCP Keeper - const char * port_name = "keeper_server.tcp_port"; - createServer( - config(), listen_host, port_name, listen_try, /* start_server: */ false, - servers_to_start_before_tables, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config(), socket, listen_host, port); - socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); - socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); - return ProtocolServerAdapter( - listen_host, - port_name, - "Keeper (tcp): " + address.toString(), - std::make_unique( - new KeeperTCPHandlerFactory( - config_getter, global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout.totalSeconds(), - global_context->getSettingsRef().send_timeout.totalSeconds(), - false), server_pool, socket)); - }); - - const char * secure_port_name = "keeper_server.tcp_port_secure"; - createServer( - config(), listen_host, secure_port_name, listen_try, /* start_server: */ false, - servers_to_start_before_tables, - [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); - socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); - return ProtocolServerAdapter( - listen_host, - secure_port_name, - "Keeper with secure protocol (tcp_secure): " + address.toString(), - std::make_unique( - new KeeperTCPHandlerFactory( - config_getter, global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout.totalSeconds(), - global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); - - /// HTTP control endpoints - port_name = "keeper_server.http_control.port"; - createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false, - servers_to_start_before_tables, - [&](UInt16 port) -> ProtocolServerAdapter - { - auto http_context = httpContext(); - Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(http_context->getReceiveTimeout()); - http_params->setKeepAliveTimeout(keep_alive_timeout); - - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config(), socket, listen_host, port); - socket.setReceiveTimeout(http_context->getReceiveTimeout()); - socket.setSendTimeout(http_context->getSendTimeout()); - return ProtocolServerAdapter( - listen_host, - port_name, - "HTTP Control: http://" + address.toString(), - std::make_unique( - std::move(http_context), - createKeeperHTTPControlMainHandlerFactory( - config_getter(), - global_context->getKeeperDispatcher(), - "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)); - }); - } -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); -#endif - - } - - { - std::lock_guard lock(servers_lock); - /// We should start interserver communications before (and more important shutdown after) tables. - /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. - /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can - /// communicate with zookeeper, execute merges, etc. - createInterserverServers( - config(), - interserver_listen_hosts, - listen_try, - server_pool, - async_metrics, - servers_to_start_before_tables, - /* start_servers= */ false); - - - for (auto & server : servers_to_start_before_tables) - { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); - } - } + servers_to_start_before_tables.createServers( + config(), + *this, + servers_lock, + server_pool, + async_metrics, + /* start_servers= */ false, + ServerType(ServerType::Type::QUERIES_ALL) + ); /// Initialize access storages. auto & access_control = global_context->getAccessControl(); @@ -1799,19 +1498,18 @@ try global_context->setStopServersCallback([&](const ServerType & server_type) { std::lock_guard lock(servers_lock); - stopServers(servers, server_type); + servers.stopServers(server_type); }); global_context->setStartServersCallback([&](const ServerType & server_type) { std::lock_guard lock(servers_lock); - createServers( + servers.createServers( config(), - listen_hosts, - listen_try, + *this, + servers_lock, server_pool, async_metrics, - servers, /* start_servers= */ true, server_type); }); @@ -2024,18 +1722,21 @@ try { std::lock_guard lock(servers_lock); - createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); + servers.createServers( + config(), + *this, + servers_lock, + server_pool, + async_metrics, + false, + ServerType(ServerType::Type::QUERIES_ALL)); if (servers.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " - "to configuration file.)"); + throw Exception( + ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' " + "or 'http_port' to configuration file.)"); } - if (servers.empty()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " - "to configuration file.)"); - #if USE_SSL CertificateReloader::instance().tryLoad(config()); #endif @@ -2107,12 +1808,7 @@ try { std::lock_guard lock(servers_lock); - for (auto & server : servers) - { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); - } - + servers.startServers(); global_context->setServerCompletelyStarted(); LOG_INFO(log, "Ready for connections."); } @@ -2148,46 +1844,10 @@ try access_control.stopPeriodicReloading(); is_cancelled = true; - - LOG_DEBUG(log, "Waiting for current connections to close."); - - size_t current_connections = 0; - { - std::lock_guard lock(servers_lock); - for (auto & server : servers) - { - server.stop(); - current_connections += server.currentConnections(); - } - } - - if (current_connections) - LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); - else - LOG_INFO(log, "Closed all listening sockets."); - - /// Wait for unfinished backups and restores. - /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries - /// (because killAllQueries() will cancel all running backups/restores). - if (server_settings.shutdown_wait_backups_and_restores) - global_context->waitAllBackupsAndRestores(); - - /// Killing remaining queries. - if (!server_settings.shutdown_wait_unfinished_queries) - global_context->getProcessList().killAllQueries(); - - if (current_connections) - current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished); - - if (current_connections) - LOG_WARNING(log, "Closed connections. But {} remain." - " Tip: To increase wait time add to config: 60", current_connections); - else - LOG_INFO(log, "Closed connections."); - + const auto remaining_connections = servers.stopServers(server_settings, servers_lock); dns_cache_updater.reset(); - if (current_connections) + if (remaining_connections) { /// There is no better way to force connections to close in Poco. /// Otherwise connection handlers will continue to live @@ -2221,561 +1881,4 @@ catch (...) return code ? code : -1; } -std::unique_ptr Server::buildProtocolStackFromConfig( - const Poco::Util::AbstractConfiguration & config, - const std::string & protocol, - Poco::Net::HTTPServerParams::Ptr http_params, - AsynchronousMetrics & async_metrics, - bool & is_secure) -{ - auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr - { - if (type == "tcp") - return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes)); - - if (type == "tls") -#if USE_SSL - return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - - if (type == "proxy1") - return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); - if (type == "mysql") - return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes)); - if (type == "postgres") - return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes)); - if (type == "http") - return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes) - ); - if (type == "prometheus") - return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes) - ); - if (type == "interserver") - return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes) - ); - - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); - }; - - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - std::unordered_set pset {conf_name}; - - auto stack = std::make_unique(*this, conf_name); - - while (true) - { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) - { - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; - } - - stack->append(create_factory(type, conf_name)); - } - - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } - - return stack; -} - -HTTPContextPtr Server::httpContext() const -{ - return std::make_shared(context()); -} - -void Server::createServers( - Poco::Util::AbstractConfiguration & config, - const Strings & listen_hosts, - bool listen_try, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - std::vector & servers, - bool start_servers, - const ServerType & server_type) -{ - const Settings & settings = global_context->getSettingsRef(); - - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); - - Poco::Util::AbstractConfiguration::Keys protocols; - config.keys("protocols", protocols); - - for (const auto & protocol : protocols) - { - if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) - continue; - - std::string prefix = "protocols." + protocol + "."; - std::string port_name = prefix + "port"; - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - - if (!config.has(prefix + "port")) - continue; - - std::vector hosts; - if (config.has(prefix + "host")) - hosts.push_back(config.getString(prefix + "host")); - else - hosts = listen_hosts; - - for (const auto & host : hosts) - { - bool is_secure = false; - auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); - - if (stack->empty()) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); - - createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, host, port, is_secure); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - - return ProtocolServerAdapter( - host, - port_name.c_str(), - description + ": " + address.toString(), - std::make_unique( - stack.release(), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - } - - for (const auto & listen_host : listen_hosts) - { - const char * port_name; - - if (server_type.shouldStart(ServerType::Type::HTTP)) - { - /// HTTP - port_name = "http_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - - return ProtocolServerAdapter( - listen_host, - port_name, - "http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); - }); - } - - if (server_type.shouldStart(ServerType::Type::HTTPS)) - { - /// HTTPS - port_name = "https_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "https://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); - } - - if (server_type.shouldStart(ServerType::Type::TCP)) - { - /// TCP - port_name = "tcp_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - - if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) - { - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - port_name = "tcp_with_proxy_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp) with PROXY: " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - - if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) - { - /// TCP with SSL - port_name = "tcp_port_secure"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); - #endif - }); - } - - if (server_type.shouldStart(ServerType::Type::MYSQL)) - { - port_name = "mysql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "MySQL compatibility protocol: " + address.toString(), - std::make_unique(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); - }); - } - - if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) - { - port_name = "postgresql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); - }); - } - -#if USE_GRPC - if (server_type.shouldStart(ServerType::Type::GRPC)) - { - port_name = "grpc_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::SocketAddress server_address(listen_host, port); - return ProtocolServerAdapter( - listen_host, - port_name, - "gRPC protocol: " + server_address.toString(), - std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); - }); - } -#endif - if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) - { - /// Prometheus (if defined and not setup yet with http_port) - port_name = "prometheus.port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Prometheus: http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)); - }); - } - } -} - -void Server::createInterserverServers( - Poco::Util::AbstractConfiguration & config, - const Strings & interserver_listen_hosts, - bool listen_try, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - std::vector & servers, - bool start_servers, - const ServerType & server_type) -{ - const Settings & settings = global_context->getSettingsRef(); - - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); - - /// Now iterate over interserver_listen_hosts - for (const auto & interserver_listen_host : interserver_listen_hosts) - { - const char * port_name; - - if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) - { - /// Interserver IO HTTP - port_name = "interserver_http_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "replica communication (interserver): http://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfaceInterserverReceiveBytes, - ProfileEvents::InterfaceInterserverSendBytes)); - }); - } - - if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) - { - port_name = "interserver_https_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "secure replica communication (interserver): https://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfaceInterserverReceiveBytes, - ProfileEvents::InterfaceInterserverSendBytes)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); - } - } -} - -void Server::stopServers( - std::vector & servers, - const ServerType & server_type -) const -{ - LoggerRawPtr log = &logger(); - - /// Remove servers once all their connections are closed - auto check_server = [&log](const char prefix[], auto & server) - { - if (!server.isStopping()) - return false; - size_t current_connections = server.currentConnections(); - LOG_DEBUG(log, "Server {}{}: {} ({} connections)", - server.getDescription(), - prefix, - !current_connections ? "finished" : "waiting", - current_connections); - return !current_connections; - }; - - std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); - - for (auto & server : servers) - { - if (!server.isStopping()) - { - const std::string server_port_name = server.getPortName(); - - if (server_type.shouldStop(server_port_name)) - server.stop(); - } - } - - std::erase_if(servers, std::bind_front(check_server, "")); -} - -void Server::updateServers( - Poco::Util::AbstractConfiguration & config, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - std::vector & servers, - std::vector & servers_to_start_before_tables) -{ - LoggerRawPtr log = &logger(); - - const auto listen_hosts = getListenHosts(config); - const auto interserver_listen_hosts = getInterserverListenHosts(config); - const auto listen_try = getListenTry(config); - - /// Remove servers once all their connections are closed - auto check_server = [&log](const char prefix[], auto & server) - { - if (!server.isStopping()) - return false; - size_t current_connections = server.currentConnections(); - LOG_DEBUG(log, "Server {}{}: {} ({} connections)", - server.getDescription(), - prefix, - !current_connections ? "finished" : "waiting", - current_connections); - return !current_connections; - }; - - std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)")); - - Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); - - std::vector all_servers; - all_servers.reserve(servers.size() + servers_to_start_before_tables.size()); - for (auto & server : servers) - all_servers.push_back(&server); - - for (auto & server : servers_to_start_before_tables) - all_servers.push_back(&server); - - for (auto * server : all_servers) - { - if (!server->isStopping()) - { - std::string port_name = server->getPortName(); - bool has_host = false; - bool is_http = false; - if (port_name.starts_with("protocols.")) - { - std::string protocol = port_name.substr(0, port_name.find_last_of('.')); - has_host = config.has(protocol + ".host"); - - std::string conf_name = protocol; - std::string prefix = protocol + "."; - std::unordered_set pset {conf_name}; - while (true) - { - if (config.has(prefix + "type")) - { - std::string type = config.getString(prefix + "type"); - if (type == "http") - { - is_http = true; - break; - } - } - - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } - } - else - { - /// NOTE: better to compare using getPortName() over using - /// dynamic_cast<> since HTTPServer is also used for prometheus and - /// internal replication communications. - is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port"; - } - - if (!has_host) - has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end(); - bool has_port = !config.getString(port_name, "").empty(); - bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); - if (force_restart) - LOG_TRACE(log, " had been changed, will reload {}", server->getDescription()); - - if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart) - { - server->stop(); - LOG_INFO(log, "Stopped listening for {}", server->getDescription()); - } - } - } - - createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); - createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true); - - std::erase_if(servers, std::bind_front(check_server, "")); - std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, "")); -} - } diff --git a/programs/server/Server.h b/programs/server/Server.h index 3f03dd137ef..b4931ce53d1 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -1,15 +1,10 @@ #pragma once #include - #include -#include -#include -#include -#include /** Server provides three interfaces: - * 1. HTTP - simple interface for any applications. + * 1. HTTP, GRPC - simple interfaces for any applications. * 2. TCP - interface for native clickhouse-client and for server to server internal communications. * More rich and efficient, but less compatible * - data is transferred by columns; @@ -18,43 +13,21 @@ * 3. Interserver HTTP - for replication. */ -namespace Poco -{ - namespace Net - { - class ServerSocket; - } -} - namespace DB { -class AsynchronousMetrics; -class ProtocolServerAdapter; class Server : public BaseDaemon, public IServer { public: using ServerApplication::run; - Poco::Util::LayeredConfiguration & config() const override - { - return BaseDaemon::config(); - } + Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); } - Poco::Logger & logger() const override - { - return BaseDaemon::logger(); - } + Poco::Logger & logger() const override { return BaseDaemon::logger(); } - ContextMutablePtr context() const override - { - return global_context; - } + ContextMutablePtr context() const override { return global_context; } - bool isCancelled() const override - { - return BaseDaemon::isCancelled(); - } + bool isCancelled() const override { return BaseDaemon::isCancelled(); } void defineOptions(Poco::Util::OptionSet & _options) override; @@ -73,64 +46,6 @@ private: ContextMutablePtr global_context; /// Updated/recent config, to compare http_handlers ConfigurationPtr latest_config; - - HTTPContextPtr httpContext() const; - - Poco::Net::SocketAddress socketBindListen( - const Poco::Util::AbstractConfiguration & config, - Poco::Net::ServerSocket & socket, - const std::string & host, - UInt16 port, - [[maybe_unused]] bool secure = false) const; - - std::unique_ptr buildProtocolStackFromConfig( - const Poco::Util::AbstractConfiguration & config, - const std::string & protocol, - Poco::Net::HTTPServerParams::Ptr http_params, - AsynchronousMetrics & async_metrics, - bool & is_secure); - - using CreateServerFunc = std::function; - void createServer( - Poco::Util::AbstractConfiguration & config, - const std::string & listen_host, - const char * port_name, - bool listen_try, - bool start_server, - std::vector & servers, - CreateServerFunc && func) const; - - void createServers( - Poco::Util::AbstractConfiguration & config, - const Strings & listen_hosts, - bool listen_try, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - std::vector & servers, - bool start_servers = false, - const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); - - void createInterserverServers( - Poco::Util::AbstractConfiguration & config, - const Strings & interserver_listen_hosts, - bool listen_try, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - std::vector & servers, - bool start_servers = false, - const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); - - void updateServers( - Poco::Util::AbstractConfiguration & config, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - std::vector & servers, - std::vector & servers_to_start_before_tables); - - void stopServers( - std::vector & servers, - const ServerType & server_type - ) const; }; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4e8946facda..826204111a0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -234,6 +234,7 @@ add_object_library(clickhouse_client Client) add_object_library(clickhouse_bridge BridgeHelper) add_object_library(clickhouse_server Server) add_object_library(clickhouse_server_http Server/HTTP) +add_object_library(clickhouse_server_manager Server/ServersManager) add_object_library(clickhouse_formats Formats) add_object_library(clickhouse_processors Processors) add_object_library(clickhouse_processors_executors Processors/Executors) diff --git a/src/Server/ServersManager/IServersManager.cpp b/src/Server/ServersManager/IServersManager.cpp new file mode 100644 index 00000000000..c903d90f766 --- /dev/null +++ b/src/Server/ServersManager/IServersManager.cpp @@ -0,0 +1,268 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int NETWORK_ERROR; +extern const int INVALID_CONFIG_PARAMETER; +} + +IServersManager::IServersManager(ContextMutablePtr l_global_context, Poco::Logger * l_logger) + : global_context(l_global_context), logger(l_logger) +{ +} + + +bool IServersManager::empty() const +{ + return servers.empty(); +} + +std::vector IServersManager::getMetrics() const +{ + std::vector metrics; + metrics.reserve(servers.size()); + for (const auto & server : servers) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + return metrics; +} + +void IServersManager::startServers() +{ + for (auto & server : servers) + { + server.start(); + LOG_INFO(logger, "Listening for {}", server.getDescription()); + } +} + +void IServersManager::stopServers(const ServerType & server_type) +{ + /// Remove servers once all their connections are closed + auto check_server = [&](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG( + logger, + "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); + + for (auto & server : servers) + { + if (!server.isStopping() && server_type.shouldStop(server.getPortName())) + server.stop(); + } + + std::erase_if(servers, std::bind_front(check_server, "")); +} + +void IServersManager::updateServers( + const Poco::Util::AbstractConfiguration & config, + IServer & iserver, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + ConfigurationPtr latest_config) +{ + stopServersForUpdate(config, latest_config); + createServers(config, iserver, servers_lock, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL)); +} + +Poco::Net::SocketAddress IServersManager::socketBindListen( + const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const +{ + auto address = makeSocketAddress(host, port, logger); + socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false)); + /// If caller requests any available port from the OS, discover it after binding. + if (port == 0) + { + address = socket.address(); + LOG_DEBUG(logger, "Requested any available port (port == 0), actual port is {:d}", address.port()); + } + + socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096)); + return address; +} + +void IServersManager::createServer( + const Poco::Util::AbstractConfiguration & config, + const std::string & listen_host, + const char * port_name, + CreateServerFunc && func, + bool start_server) +{ + /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. + if (config.getString(port_name, "").empty()) + return; + + /// If we already have an active server for this listen_host/port_name, don't create it again + for (const auto & server : servers) + { + if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name) + return; + } + + auto port = config.getInt(port_name); + try + { + servers.push_back(func(port)); + if (start_server) + { + servers.back().start(); + LOG_INFO(logger, "Listening for {}", servers.back().getDescription()); + } + global_context->registerServerPort(port_name, port); + } + catch (const Poco::Exception &) + { + if (!getListenTry(config)) + { + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); + } + LOG_WARNING( + logger, + "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " + "specify not disabled IPv4 or IPv6 address to listen in element of configuration " + "file. Example for disabled IPv6: 0.0.0.0 ." + " Example for disabled IPv4: ::", + listen_host, + port, + getCurrentExceptionMessage(false)); + } +} + +void IServersManager::stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config) +{ + /// Remove servers once all their connections are closed + auto check_server = [&](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG( + logger, + "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)")); + + const auto listen_hosts = getListenHosts(config); + const Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : config; + + for (auto & server : servers) + { + if (server.isStopping()) + return; + std::string port_name = server.getPortName(); + bool has_host = false; + bool is_http = false; + if (port_name.starts_with("protocols.")) + { + std::string protocol = port_name.substr(0, port_name.find_last_of('.')); + has_host = config.has(protocol + ".host"); + + std::string conf_name = protocol; + std::string prefix = protocol + "."; + std::unordered_set pset{conf_name}; + while (true) + { + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "http") + { + is_http = true; + break; + } + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception( + ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + } + else + { + /// NOTE: better to compare using getPortName() over using + /// dynamic_cast<> since HTTPServer is also used for prometheus and + /// internal replication communications. + is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + } + + if (!has_host) + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + bool has_port = !config.getString(port_name, "").empty(); + bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); + if (force_restart) + LOG_TRACE(logger, " had been changed, will reload {}", server.getDescription()); + + if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart) + { + server.stop(); + LOG_INFO(logger, "Stopped listening for {}", server.getDescription()); + } + } + + std::erase_if(servers, std::bind_front(check_server, "")); +} + +Strings IServersManager::getListenHosts(const Poco::Util::AbstractConfiguration & config) const +{ + auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host"); + if (listen_hosts.empty()) + { + listen_hosts.emplace_back("::1"); + listen_hosts.emplace_back("127.0.0.1"); + } + return listen_hosts; +} + +bool IServersManager::getListenTry(const Poco::Util::AbstractConfiguration & config) const +{ + bool listen_try = config.getBool("listen_try", false); + if (!listen_try) + { + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() + && std::none_of( + protocols.begin(), + protocols.end(), + [&](const auto & protocol) + { return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); }); + } + return listen_try; +} + +} diff --git a/src/Server/ServersManager/IServersManager.h b/src/Server/ServersManager/IServersManager.h new file mode 100644 index 00000000000..5218ab63554 --- /dev/null +++ b/src/Server/ServersManager/IServersManager.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class IServersManager +{ +public: + IServersManager(ContextMutablePtr global_context, Poco::Logger * logger); + virtual ~IServersManager() = default; + + bool empty() const; + std::vector getMetrics() const; + + virtual void createServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type) + = 0; + + virtual void startServers(); + + virtual void stopServers(const ServerType & server_type); + virtual size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) = 0; + + virtual void updateServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + ConfigurationPtr latest_config); + +protected: + ContextMutablePtr global_context; + Poco::Logger * logger; + + std::vector servers; + + Poco::Net::SocketAddress socketBindListen( + const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const; + + using CreateServerFunc = std::function; + virtual void createServer( + const Poco::Util::AbstractConfiguration & config, + const std::string & listen_host, + const char * port_name, + CreateServerFunc && func, + bool start_server); + + virtual void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config); + + Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) const; + bool getListenTry(const Poco::Util::AbstractConfiguration & config) const; +}; + +} diff --git a/src/Server/ServersManager/InterServersManager.cpp b/src/Server/ServersManager/InterServersManager.cpp new file mode 100644 index 00000000000..28491a4f4f4 --- /dev/null +++ b/src/Server/ServersManager/InterServersManager.cpp @@ -0,0 +1,327 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if USE_SSL +# include +#endif + +#if USE_NURAFT +# include +# include +#endif + +namespace ProfileEvents +{ +extern const Event InterfaceInterserverSendBytes; +extern const Event InterfaceInterserverReceiveBytes; +} + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int SUPPORT_IS_DISABLED; +} + +void InterServersManager::createServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type) +{ + if (config.has("keeper_server.server_id")) + { +#if USE_NURAFT + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (zkutil::hasZooKeeperConfig(config)) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } + /// Initialize keeper RAFT. + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); + + auto config_getter = [this]() -> const Poco::Util::AbstractConfiguration & { return global_context->getConfigRef(); }; + + for (const auto & listen_host : getListenHosts(config)) + { + /// TCP Keeper + constexpr auto port_name = "keeper_server.tcp_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout( + Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout( + Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); + return ProtocolServerAdapter( + listen_host, + port_name, + "Keeper (tcp): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory( + config_getter, + global_context->getKeeperDispatcher(), + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), + false), + server_pool, + socket)); + }, + /* start_server = */ false); + + constexpr auto secure_port_name = "keeper_server.tcp_port_secure"; + createServer( + config, + listen_host, + secure_port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { +# if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout( + Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout( + Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); + return ProtocolServerAdapter( + listen_host, + secure_port_name, + "Keeper with secure protocol (tcp_secure): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory( + config_getter, + global_context->getKeeperDispatcher(), + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), + true), + server_pool, + socket)); +# else + UNUSED(port); + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +# endif + }, + /* start_server: */ false); + + /// HTTP control endpoints + createServer( + config, + listen_host, + /* port_name = */ "keeper_server.http_control.port", + [&](UInt16 port) -> ProtocolServerAdapter + { + auto http_context = std::make_shared(global_context); + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(http_context->getReceiveTimeout()); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(http_context->getReceiveTimeout()); + socket.setSendTimeout(http_context->getSendTimeout()); + return ProtocolServerAdapter( + listen_host, + port_name, + "HTTP Control: http://" + address.toString(), + std::make_unique( + std::move(http_context), + createKeeperHTTPControlMainHandlerFactory( + config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), + server_pool, + socket, + http_params)); + }, + /* start_server: */ false); + } +#else + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); +#endif + } + + { + std::lock_guard lock(servers_lock); + /// We should start interserver communications before (and more important shutdown after) tables. + /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. + /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can + /// communicate with zookeeper, execute merges, etc. + createInterserverServers(config, server, server_pool, async_metrics, start_servers, server_type); + startServers(); + } +} + +size_t InterServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) +{ + if (servers.empty()) + { + return 0; + } + + LOG_DEBUG(logger, "Waiting for current connections to servers for tables to finish."); + + size_t current_connections = 0; + { + std::lock_guard lock(servers_lock); + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } + } + + if (current_connections) + LOG_INFO(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(logger, "Closed all listening sockets."); + + if (current_connections > 0) + current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished); + + if (current_connections) + LOG_INFO( + logger, + "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections " + "after context shutdown.", + current_connections); + else + LOG_INFO(logger, "Closed connections to servers for tables."); + return current_connections; +} + +void InterServersManager::updateServers( + const Poco::Util::AbstractConfiguration & config, + IServer & iserver, + std::mutex & /*servers_lock*/, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + ConfigurationPtr latest_config) +{ + stopServersForUpdate(config, latest_config); + createInterserverServers(config, iserver, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL)); +} + +Strings InterServersManager::getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const +{ + auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host"); + if (!interserver_listen_hosts.empty()) + return interserver_listen_hosts; + + /// Use more general restriction in case of emptiness + return getListenHosts(config); +} + +void InterServersManager::createInterserverServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); + + /// Now iterate over interserver_listen_hosts + for (const auto & interserver_listen_host : getInterserverListenHosts(config)) + { + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) + { + /// Interserver IO HTTP + constexpr auto port_name = "interserver_http_port"; + createServer( + config, + interserver_listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + std::make_shared(global_context), + createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) + { + constexpr auto port_name = "interserver_https_port"; + createServer( + config, + interserver_listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + std::make_shared(global_context), + createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); +#else + UNUSED(port); + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif + }, + start_servers); + } + } +} + +} diff --git a/src/Server/ServersManager/InterServersManager.h b/src/Server/ServersManager/InterServersManager.h new file mode 100644 index 00000000000..2a389e28c22 --- /dev/null +++ b/src/Server/ServersManager/InterServersManager.h @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace DB +{ + +class InterServersManager : public IServersManager +{ +public: + using IServersManager::IServersManager; + + void createServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type) override; + + using IServersManager::stopServers; + size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override; + + void updateServers( + const Poco::Util::AbstractConfiguration & config, + IServer & iserver, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + ConfigurationPtr latest_config) override; + +private: + Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const; + + void createInterserverServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type); +}; + +} diff --git a/src/Server/ServersManager/ProtocolServersManager.cpp b/src/Server/ServersManager/ProtocolServersManager.cpp new file mode 100644 index 00000000000..17b028eddbb --- /dev/null +++ b/src/Server/ServersManager/ProtocolServersManager.cpp @@ -0,0 +1,523 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if USE_SSL +# include +#endif + +#if USE_GRPC +# include +#endif + +namespace ProfileEvents +{ +extern const Event InterfaceNativeSendBytes; +extern const Event InterfaceNativeReceiveBytes; +extern const Event InterfaceHTTPSendBytes; +extern const Event InterfaceHTTPReceiveBytes; +extern const Event InterfacePrometheusSendBytes; +extern const Event InterfacePrometheusReceiveBytes; +extern const Event InterfaceMySQLSendBytes; +extern const Event InterfaceMySQLReceiveBytes; +extern const Event InterfacePostgreSQLSendBytes; +extern const Event InterfacePostgreSQLReceiveBytes; +extern const Event InterfaceInterserverSendBytes; +extern const Event InterfaceInterserverReceiveBytes; +} + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int SUPPORT_IS_DISABLED; +extern const int INVALID_CONFIG_PARAMETER; +} + +void ProtocolServersManager::createServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + std::mutex & /*servers_lock*/, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type) +{ + auto listen_hosts = getListenHosts(config); + const Settings & settings = global_context->getSettingsRef(); + + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); + + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + + for (const auto & protocol : protocols) + { + if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + continue; + + std::string prefix = "protocols." + protocol + "."; + std::string port_name = prefix + "port"; + std::string description{" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + + if (!config.has(prefix + "port")) + continue; + + std::vector hosts; + if (config.has(prefix + "host")) + hosts.push_back(config.getString(prefix + "host")); + else + hosts = listen_hosts; + + for (const auto & host : hosts) + { + bool is_secure = false; + auto stack = buildProtocolStackFromConfig(config, server, protocol, http_params, async_metrics, is_secure); + + if (stack->empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + + createServer( + config, + host, + port_name.c_str(), + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique(stack.release(), server_pool, socket, new Poco::Net::TCPServerParams)); + }, + start_servers); + } + } + + for (const auto & listen_host : listen_hosts) + { + if (server_type.shouldStart(ServerType::Type::HTTP)) + { + /// HTTP + constexpr auto port_name = "http_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + std::make_shared(global_context), + createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfaceHTTPReceiveBytes, + ProfileEvents::InterfaceHTTPSendBytes)); + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::HTTPS)) + { + /// HTTPS + constexpr auto port_name = "https_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + std::make_shared(global_context), + createHandlerFactory(server, config, async_metrics, "HTTPSHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfaceHTTPReceiveBytes, + ProfileEvents::InterfaceHTTPSendBytes)); +#else + UNUSED(port); + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "HTTPS protocol is disabled because Poco library was built without NetSSL support."); +#endif + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::TCP)) + { + /// TCP + constexpr auto port_name = "tcp_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory( + server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) + { + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + constexpr auto port_name = "tcp_with_proxy_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory( + server, false, true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) + { + /// TCP with SSL + constexpr auto port_name = "tcp_port_secure"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory( + server, true, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); +#else + UNUSED(port); + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::MYSQL)) + { + constexpr auto port_name = "mysql_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique( + new MySQLHandlerFactory( + server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }, + start_servers); + } + + if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) + { + constexpr auto port_name = "postgresql_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique( + new PostgreSQLHandlerFactory( + server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }, + start_servers); + } + +#if USE_GRPC + if (server_type.shouldStart(ServerType::Type::GRPC)) + { + constexpr auto port_name = "grpc_port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(server, makeSocketAddress(listen_host, port, logger))); + }, + start_servers); + } +#endif + if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) + { + /// Prometheus (if defined and not setup yet with http_port) + constexpr auto port_name = "prometheus.port"; + createServer( + config, + listen_host, + port_name, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + std::make_shared(global_context), + createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfacePrometheusReceiveBytes, + ProfileEvents::InterfacePrometheusSendBytes)); + }, + start_servers); + } + } +} + +size_t ProtocolServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) +{ + if (servers.empty()) + { + return 0; + } + + LOG_DEBUG(logger, "Waiting for current connections to close."); + + size_t current_connections = 0; + { + std::lock_guard lock(servers_lock); + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } + } + + if (current_connections) + LOG_WARNING(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(logger, "Closed all listening sockets."); + + /// Wait for unfinished backups and restores. + /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries + /// (because killAllQueries() will cancel all running backups/restores). + if (server_settings.shutdown_wait_backups_and_restores) + global_context->waitAllBackupsAndRestores(); + /// Killing remaining queries. + if (!server_settings.shutdown_wait_unfinished_queries) + global_context->getProcessList().killAllQueries(); + + if (current_connections) + current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished); + + if (current_connections) + LOG_WARNING( + logger, + "Closed connections. But {} remain." + " Tip: To increase wait time add to config: 60", + current_connections); + else + LOG_INFO(logger, "Closed connections."); + return current_connections; +} + +std::unique_ptr ProtocolServersManager::buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure) const +{ + auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr + { + if (type == "tcp") + return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory( + server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes)); + + if (type == "tls") +#if USE_SSL + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(server, conf_name)); +#else + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif + + if (type == "proxy1") + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(server, conf_name)); + if (type == "mysql") + return TCPServerConnectionFactory::Ptr( + new MySQLHandlerFactory(server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes)); + if (type == "postgres") + return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory( + server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes)); + if (type == "http") + return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory( + std::make_shared(global_context), + http_params, + createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"), + ProfileEvents::InterfaceHTTPReceiveBytes, + ProfileEvents::InterfaceHTTPSendBytes)); + if (type == "prometheus") + return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory( + std::make_shared(global_context), + http_params, + createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"), + ProfileEvents::InterfacePrometheusReceiveBytes, + ProfileEvents::InterfacePrometheusSendBytes)); + if (type == "interserver") + return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory( + std::make_shared(global_context), + http_params, + createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"), + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); + + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); + }; + + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset{conf_name}; + + auto stack = std::make_unique(server, conf_name); + + while (true) + { + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception( + ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + + return stack; +} + +} diff --git a/src/Server/ServersManager/ProtocolServersManager.h b/src/Server/ServersManager/ProtocolServersManager.h new file mode 100644 index 00000000000..e9eaaeb2184 --- /dev/null +++ b/src/Server/ServersManager/ProtocolServersManager.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ProtocolServersManager : public IServersManager +{ +public: + using IServersManager::IServersManager; + + void createServers( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + std::mutex & servers_lock, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + bool start_servers, + const ServerType & server_type) override; + + using IServersManager::stopServers; + size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override; + +private: + std::unique_ptr buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + IServer & server, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure) const; +}; + +} From 27627f603fcfcd6df06bfb5210463c1fff8763c6 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 24 May 2024 03:04:36 +0000 Subject: [PATCH 586/651] fix --- .../0_stateless/02319_lightweight_delete_on_merge_tree.sql | 2 +- tests/queries/0_stateless/02792_drop_projection_lwd.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 050b8e37722..f82f79dbe44 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -102,7 +102,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()) SETTINGS INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS } +DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql index a1d8a9c90f3..dcde7dcc600 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -7,7 +7,7 @@ CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); -- LWD does not work, as expected -DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError BAD_ARGUMENTS } +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError NOT_IMPLEMENTED } KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; -- drop projection From 029e2ea22624f067d546317faab02f189b143df8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 05:54:16 +0200 Subject: [PATCH 587/651] Standardize references to data type docs --- .../functions/arithmetic-functions.md | 32 +- .../functions/array-functions.md | 98 +++--- .../sql-reference/functions/bit-functions.md | 20 +- .../functions/bitmap-functions.md | 14 +- .../functions/date-time-functions.md | 226 ++++++------- .../functions/distance-functions.md | 78 ++--- .../functions/encoding-functions.md | 50 +-- .../functions/encryption-functions.md | 44 +-- .../functions/ext-dict-functions.md | 32 +- docs/en/sql-reference/functions/files.md | 2 +- .../functions/functions-for-nulls.md | 2 +- .../functions/geo/coordinates.md | 4 +- .../en/sql-reference/functions/geo/geohash.md | 12 +- docs/en/sql-reference/functions/geo/h3.md | 214 ++++++------- docs/en/sql-reference/functions/geo/s2.md | 72 ++--- .../sql-reference/functions/hash-functions.md | 302 +++++++++--------- docs/en/sql-reference/functions/index.md | 4 +- .../sql-reference/functions/introspection.md | 28 +- .../functions/ip-address-functions.md | 26 +- .../sql-reference/functions/json-functions.md | 50 +-- .../functions/logical-functions.md | 24 +- .../sql-reference/functions/math-functions.md | 136 ++++---- .../sql-reference/functions/nlp-functions.md | 18 +- .../functions/other-functions.md | 116 +++---- .../functions/random-functions.md | 34 +- .../functions/rounding-functions.md | 8 +- .../functions/splitting-merging-functions.md | 36 +-- .../functions/string-functions.md | 116 +++---- .../functions/string-replace-functions.md | 8 +- .../functions/string-search-functions.md | 96 +++--- .../functions/time-series-functions.md | 8 +- .../functions/time-window-functions.md | 10 +- .../functions/tuple-functions.md | 56 ++-- .../functions/tuple-map-functions.md | 76 ++--- .../functions/type-conversion-functions.md | 168 +++++----- .../sql-reference/functions/ulid-functions.md | 8 +- .../sql-reference/functions/url-functions.md | 32 +- .../sql-reference/functions/uuid-functions.md | 26 +- .../functions/ym-dict-functions.md | 6 +- 39 files changed, 1146 insertions(+), 1146 deletions(-) diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 6515ab6d702..e3fb1d91c05 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -77,7 +77,7 @@ Alias: `a * b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -172,8 +172,8 @@ ifNotFinite(x,y) **Arguments** -- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md). +- `x` — Value to check for infinity. [Float\*](../data-types/float.md). +- `y` — Fallback value. [Float\*](../data-types/float.md). **Returned value** @@ -208,7 +208,7 @@ isNaN(x) Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. @@ -312,7 +312,7 @@ lcm(a, b) ## max2 -Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). +Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md). **Syntax** @@ -338,7 +338,7 @@ Result: ## min2 -Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). +Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md). **Syntax** @@ -364,7 +364,7 @@ Result: ## multiplyDecimal -Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). +Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md). The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. @@ -378,13 +378,13 @@ multiplyDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value. [Decimal](../data-types/decimal.md). +- `b` — Second value. [Decimal](../data-types/decimal.md). +- `result_scale` — Scale of result. [Int/UInt](../data-types/int-uint.md). **Returned value** -- The result of multiplication with given scale. [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of multiplication with given scale. [Decimal256](../data-types/decimal.md). **Example** @@ -438,7 +438,7 @@ Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal m ## divideDecimal -Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). +Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md). The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. @@ -452,13 +452,13 @@ divideDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../data-types/decimal.md). +- `b` — Second value: [Decimal](../data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../data-types/int-uint.md). **Returned value** -- The result of division with given scale. [Decimal256](../../sql-reference/data-types/decimal.md). +- The result of division with given scale. [Decimal256](../data-types/decimal.md). **Example** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index ff716804d97..7b52fbff714 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -19,7 +19,7 @@ empty([x]) An array is considered empty if it does not contain any elements. :::note -Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. +Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. ::: The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty). @@ -61,7 +61,7 @@ notEmpty([x]) An array is considered non-empty if it contains at least one element. :::note -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. ::: The function also works for [strings](string-functions.md#notempty) or [UUID](uuid-functions.md#notempty). @@ -96,7 +96,7 @@ Returns the number of items in the array. The result type is UInt64. The function also works for strings. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. Alias: `OCTET_LENGTH` @@ -577,7 +577,7 @@ arrayConcat(arrays) **Arguments** -- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. +- `arrays` – Arbitrary number of arguments of [Array](../data-types/array.md) type. **Example** @@ -1058,7 +1058,7 @@ arrayPushBack(array, single_value) **Arguments** - `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -1083,7 +1083,7 @@ arrayPushFront(array, single_value) **Arguments** - `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -1179,12 +1179,12 @@ arrayShingles(array, length) **Arguments** -- `array` — Input array [Array](../../sql-reference/data-types/array.md). +- `array` — Input array [Array](../data-types/array.md). - `length` — The length of each shingle. **Returned value** -- An array of generated shingles. [Array](../../sql-reference/data-types/array.md). +- An array of generated shingles. [Array](../data-types/array.md). **Examples** @@ -1760,8 +1760,8 @@ arrayReduce(agg_func, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md). +- `arr` — Any number of [array](../data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** @@ -1829,13 +1829,13 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. -- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md). +- `ranges` — The ranges to aggretate which should be an [array](../data-types/array.md) of [tuples](../data-types/tuple.md) which containing the index and the length of each range. +- `arr` — Any number of [Array](../data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** -- Array containing results of the aggregate function over specified ranges. [Array](../../sql-reference/data-types/array.md). +- Array containing results of the aggregate function over specified ranges. [Array](../data-types/array.md). **Example** @@ -1948,7 +1948,7 @@ Alias: `flatten`. **Parameters** -- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Array](../data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. **Examples** @@ -1974,7 +1974,7 @@ arrayCompact(arr) **Arguments** -`arr` — The [array](../../sql-reference/data-types/array.md) to inspect. +`arr` — The [array](../data-types/array.md) to inspect. **Returned value** @@ -2008,13 +2008,13 @@ arrayZip(arr1, arr2, ..., arrN) **Arguments** -- `arrN` — [Array](../../sql-reference/data-types/array.md). +- `arrN` — [Array](../data-types/array.md). The function can take any number of arrays of different types. All the input arrays must be of equal size. **Returned value** -- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../../sql-reference/data-types/array.md). +- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md). **Example** @@ -2364,8 +2364,8 @@ arrayMin([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** @@ -2421,8 +2421,8 @@ arrayMax([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** @@ -2478,8 +2478,8 @@ arraySum([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** @@ -2488,10 +2488,10 @@ arraySum([func,] arr) :::note Return type: -- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md). -- For floating point numbers — [Float64](../../sql-reference/data-types/float.md). -- For numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md). -- For numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). +- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../data-types/decimal.md). +- For floating point numbers — [Float64](../data-types/float.md). +- For numeric unsigned — [UInt64](../data-types/int-uint.md). +- For numeric signed — [Int64](../data-types/int-uint.md). ::: **Examples** @@ -2540,12 +2540,12 @@ arrayAvg([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../data-types/array.md). **Returned value** -- The average of function values (or the array average). [Float64](../../sql-reference/data-types/float.md). +- The average of function values (or the array average). [Float64](../data-types/float.md). **Examples** @@ -2589,7 +2589,7 @@ arrayCumSum(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** @@ -2621,7 +2621,7 @@ arrayCumSumNonNegative(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** @@ -2641,7 +2641,7 @@ Note that the `arraySumNonNegative` is a [higher-order function](../../sql-refer ## arrayProduct -Multiplies elements of an [array](../../sql-reference/data-types/array.md). +Multiplies elements of an [array](../data-types/array.md). **Syntax** @@ -2651,11 +2651,11 @@ arrayProduct(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../data-types/array.md) of numeric values. **Returned value** -- A product of array's elements. [Float64](../../sql-reference/data-types/float.md). +- A product of array's elements. [Float64](../data-types/float.md). **Examples** @@ -2679,7 +2679,7 @@ Query: SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as res, toTypeName(res); ``` -Return value type is always [Float64](../../sql-reference/data-types/float.md). Result: +Return value type is always [Float64](../data-types/float.md). Result: ``` text ┌─res─┬─toTypeName(arrayProduct(array(toDecimal64(1, 8), toDecimal64(2, 8), toDecimal64(3, 8))))─┐ @@ -2689,7 +2689,7 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md). ## arrayRotateLeft -Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +Rotates an [array](../data-types/array.md) to the left by the specified number of elements. If the number of elements is negative, the array is rotated to the right. **Syntax** @@ -2700,12 +2700,12 @@ arrayRotateLeft(arr, n) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to rotate. **Returned value** -- An array rotated to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md). +- An array rotated to the left by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2753,7 +2753,7 @@ Result: ## arrayRotateRight -Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +Rotates an [array](../data-types/array.md) to the right by the specified number of elements. If the number of elements is negative, the array is rotated to the left. **Syntax** @@ -2764,12 +2764,12 @@ arrayRotateRight(arr, n) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to rotate. **Returned value** -- An array rotated to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md). +- An array rotated to the right by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2817,7 +2817,7 @@ Result: ## arrayShiftLeft -Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +Shifts an [array](../data-types/array.md) to the left by the specified number of elements. New elements are filled with the provided argument or the default value of the array element type. If the number of elements is negative, the array is shifted to the right. @@ -2829,13 +2829,13 @@ arrayShiftLeft(arr, n[, default]) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to shift. - `default` — Optional. Default value for new elements. **Returned value** -- An array shifted to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md). +- An array shifted to the left by the specified number of elements. [Array](../data-types/array.md). **Examples** @@ -2911,7 +2911,7 @@ Result: ## arrayShiftRight -Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +Shifts an [array](../data-types/array.md) to the right by the specified number of elements. New elements are filled with the provided argument or the default value of the array element type. If the number of elements is negative, the array is shifted to the left. @@ -2923,13 +2923,13 @@ arrayShiftRight(arr, n[, default]) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md). +- `arr` — [Array](../data-types/array.md). - `n` — Number of elements to shift. - `default` — Optional. Default value for new elements. **Returned value** -- An array shifted to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md). +- An array shifted to the right by the specified number of elements. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 2538ad32022..a48893b93bf 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -34,8 +34,8 @@ bitShiftLeft(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** @@ -81,8 +81,8 @@ bitShiftRight(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** @@ -131,13 +131,13 @@ bitSlice(s, offset[, length]) **Arguments** -- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s` — s is [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1. - `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right. **Returned value** -- The substring. [String](../../sql-reference/data-types/string.md) +- The substring. [String](../data-types/string.md) **Example** @@ -362,7 +362,7 @@ bitCount(x) **Arguments** -- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. +- `x` — [Integer](../data-types/int-uint.md) or [floating-point](../data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. **Returned value** @@ -402,12 +402,12 @@ bitHammingDistance(int1, int2) **Arguments** -- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). -- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int1` — First integer value. [Int64](../data-types/int-uint.md). +- `int2` — Second integer value. [Int64](../data-types/int-uint.md). **Returned value** -- The Hamming distance. [UInt8](../../sql-reference/data-types/int-uint.md). +- The Hamming distance. [UInt8](../data-types/int-uint.md). **Examples** diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index e546de039da..a5c8a663b71 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -75,8 +75,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md). -- `range_end` – End of the range (exclusive). [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). +- `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md). **Example** @@ -105,8 +105,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). +- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md). **Example** @@ -135,8 +135,8 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** - `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild). -- `offset` – The position of the first element of the subset. [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../../sql-reference/data-types/int-uint.md). +- `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md). +- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md). **Example** @@ -163,7 +163,7 @@ bitmapContains(bitmap, needle) **Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `needle` – Searched bit value. [UInt32](../../sql-reference/data-types/int-uint.md). +- `needle` – Searched bit value. [UInt32](../data-types/int-uint.md). **Returned values** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index a1d6dbb5930..6ad26f452ad 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,7 +26,7 @@ SELECT ## makeDate -Creates a [Date](../../sql-reference/data-types/date.md) +Creates a [Date](../data-types/date.md) - from a year, month and day argument, or - from a year and day of year argument. @@ -43,14 +43,14 @@ Alias: **Arguments** -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day_of_year` — Day of the year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- A date created from the arguments. [Date](../../sql-reference/data-types/date.md). +- A date created from the arguments. [Date](../data-types/date.md). **Example** @@ -83,11 +83,11 @@ Result: ``` ## makeDate32 -Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). +Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md). ## makeDateTime -Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument. +Creates a [DateTime](../data-types/datetime.md) from a year, month, day, hour, minute and second argument. **Syntax** @@ -97,17 +97,17 @@ makeDateTime(year, month, day, hour, minute, second[, timezone]) **Arguments** -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `hour` — Hour. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `minute` — Minute. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). +- `second` — Second. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). **Returned value** -- A date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md). +- A date with time created from the arguments. [DateTime](../data-types/datetime.md). **Example** @@ -125,7 +125,7 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). **Syntax** @@ -135,7 +135,7 @@ makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, t ## timestamp -Converts the first argument 'expr' to type [DateTime64(6)](../../sql-reference/data-types/datetime64.md). +Converts the first argument 'expr' to type [DateTime64(6)](../data-types/datetime64.md). If a second argument 'expr_time' is provided, it adds the specified time to the converted value. **Syntax** @@ -148,8 +148,8 @@ Alias: `TIMESTAMP` **Arguments** -- `expr` - Date or date with time. [String](../../sql-reference/data-types/string.md). -- `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md). +- `expr` - Date or date with time. [String](../data-types/string.md). +- `expr_time` - Optional parameter. Time to add. [String](../data-types/string.md). **Examples** @@ -179,7 +179,7 @@ Result: **Returned value** -- [DateTime64](../../sql-reference/data-types/datetime64.md)(6) +- [DateTime64](../data-types/datetime64.md)(6) ## timeZone @@ -196,7 +196,7 @@ Alias: `timezone`. **Returned value** -- Timezone. [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../data-types/string.md). **Example** @@ -231,7 +231,7 @@ Alias: `serverTimezone`. **Returned value** -- Timezone. [String](../../sql-reference/data-types/string.md). +- Timezone. [String](../data-types/string.md). **Example** @@ -265,12 +265,12 @@ Alias: `toTimezone`. **Arguments** -- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). +- `value` — Time or date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — Timezone for the returned value. [String](../data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). **Returned value** -- Date and time. [DateTime](../../sql-reference/data-types/datetime.md). +- Date and time. [DateTime](../data-types/datetime.md). **Example** @@ -310,7 +310,7 @@ int32samoa: 1546300800 ## timeZoneOf -Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. +Returns the timezone name of [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) data types. **Syntax** @@ -322,11 +322,11 @@ Alias: `timezoneOf`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- Timezone name. [String](../../sql-reference/data-types/string.md). +- Timezone name. [String](../data-types/string.md). **Example** @@ -357,11 +357,11 @@ Alias: `timezoneOffset`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- Offset from UTC in seconds. [Int32](../../sql-reference/data-types/int-uint.md). +- Offset from UTC in seconds. [Int32](../data-types/int-uint.md). **Example** @@ -1192,12 +1192,12 @@ toStartOfSecond(value, [timezone]) **Arguments** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). **Returned value** -- Input value without sub-seconds. [DateTime64](../../sql-reference/data-types/datetime64.md). +- Input value without sub-seconds. [DateTime64](../data-types/datetime64.md). **Examples** @@ -1534,12 +1534,12 @@ Alias: `TO_DAYS` **Arguments** -- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `date` — The date to calculate the number of days passed since year zero from. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** -The number of days passed since date 0000-01-01. [UInt32](../../sql-reference/data-types/int-uint.md). +The number of days passed since date 0000-01-01. [UInt32](../data-types/int-uint.md). **Example** @@ -1563,7 +1563,7 @@ Result: Returns for a given number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) the corresponding date in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`FROM_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_from-days) function. -The result is undefined if it cannot be represented within the bounds of the [Date](../../sql-reference/data-types/date.md) type. +The result is undefined if it cannot be represented within the bounds of the [Date](../data-types/date.md) type. **Syntax** @@ -1579,7 +1579,7 @@ Alias: `FROM_DAYS` **Returned value** -The date corresponding to the number of days passed since year zero. [Date](../../sql-reference/data-types/date.md). +The date corresponding to the number of days passed since year zero. [Date](../data-types/date.md). **Example** @@ -1601,7 +1601,7 @@ Result: ## fromDaysSinceYearZero32 -Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../../sql-reference/data-types/date32.md). +Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md). ## age @@ -1618,7 +1618,7 @@ age('unit', startdate, enddate, [timezone]) **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../data-types/string.md). Possible values: - `nanosecond`, `nanoseconds`, `ns` @@ -1633,15 +1633,15 @@ age('unit', startdate, enddate, [timezone]) - `quarter`, `quarters`, `qq`, `q` - `year`, `years`, `yyyy`, `yy` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md). **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md). **Example** @@ -1694,7 +1694,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../data-types/string.md). Possible values: - `nanosecond`, `nanoseconds`, `ns` @@ -1709,15 +1709,15 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `quarter`, `quarters`, `qq`, `q` - `year`, `years`, `yyyy`, `yy` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md). **Returned value** -Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md). +Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md). **Example** @@ -1781,12 +1781,12 @@ Alias: `dateTrunc`. `unit` argument is case-insensitive. -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md). **Returned value** -- Value, truncated to the specified part of date. [DateTime](../../sql-reference/data-types/datetime.md). +- Value, truncated to the specified part of date. [DateTime](../data-types/datetime.md). **Example** @@ -1844,7 +1844,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. +- `unit` — The type of interval to add. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted. Possible values: - `second` @@ -1856,12 +1856,12 @@ Aliases: `dateAdd`, `DATE_ADD`. - `quarter` - `year` -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to add. [Int](../data-types/int-uint.md). +- `date` — The date or date with time to which `value` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -1918,7 +1918,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. +- `unit` — The type of interval to subtract. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted. Possible values: @@ -1931,12 +1931,12 @@ Aliases: `dateSub`, `DATE_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md). +- `date` — The date or date with time from which `value` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -1985,9 +1985,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `value` — Value of interval to add. [Int](../data-types/int-uint.md). +- `unit` — The type of interval to add. [String](../data-types/string.md). Possible values: - `second` @@ -2001,7 +2001,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Returned value** -Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2033,7 +2033,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. **Arguments** -- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to subtract. [String](../data-types/string.md). Possible values: - `second` @@ -2045,12 +2045,12 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2080,12 +2080,12 @@ addDate(date, interval) **Arguments** -- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md) -- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md). +- `date` — The date or date with time to which `interval` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md) +- `interval` — Interval to add. [Interval](../data-types/special-data-types/interval.md). **Returned value** -Date or date with time obtained by adding `interval` to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by adding `interval` to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2121,12 +2121,12 @@ subDate(date, interval) **Arguments** -- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md) -- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md). +- `date` — The date or date with time from which `interval` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md) +- `interval` — Interval to subtract. [Interval](../data-types/special-data-types/interval.md). **Returned value** -Date or date with time obtained by subtracting `interval` from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Date or date with time obtained by subtracting `interval` from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Example** @@ -2162,11 +2162,11 @@ now([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time. [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time. [DateTime](../data-types/datetime.md). **Example** @@ -2211,11 +2211,11 @@ now64([scale], [timezone]) **Arguments** - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time with sub-second precision. [DateTime64](../../sql-reference/data-types/datetime64.md). +- Current date and time with sub-second precision. [DateTime64](../data-types/datetime64.md). **Example** @@ -2245,11 +2245,11 @@ nowInBlock([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Current date and time at the moment of processing of each block of data. [DateTime](../../sql-reference/data-types/datetime.md). +- Current date and time at the moment of processing of each block of data. [DateTime](../data-types/datetime.md). **Example** @@ -2289,7 +2289,7 @@ today() **Returned value** -- Current date. [DateTime](../../sql-reference/data-types/datetime.md). +- Current date. [DateTime](../data-types/datetime.md). **Example** @@ -2379,7 +2379,7 @@ Result: ## YYYYMMDDToDate -Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md). +Converts a number containing the year, month and day number to a [Date](../data-types/date.md). This function is the opposite of function `toYYYYMMDD()`. @@ -2393,11 +2393,11 @@ YYYYMMDDToDate(yyyymmdd); **Arguments** -- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `yyyymmdd` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- a date created from the arguments. [Date](../../sql-reference/data-types/date.md). +- a date created from the arguments. [Date](../data-types/date.md). **Example** @@ -2415,11 +2415,11 @@ Result: ## YYYYMMDDToDate32 -Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md). +Like function `YYYYMMDDToDate()` but produces a [Date32](../data-types/date32.md). ## YYYYMMDDhhmmssToDateTime -Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md). +Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../data-types/datetime.md). The output is undefined if the input does not encode a valid DateTime value. @@ -2433,12 +2433,12 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]); **Arguments** -- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). - `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). **Returned value** -- a date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md). +- a date with time created from the arguments. [DateTime](../data-types/datetime.md). **Example** @@ -2456,7 +2456,7 @@ Result: ## YYYYMMDDhhmmssToDateTime64 -Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types/datetime64.md). Accepts an additional, optional `precision` parameter after the `timezone` parameter. @@ -3453,7 +3453,7 @@ Formats a Time according to the given Format string. Format is a constant expres formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. -The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). +The opposite operation of this function is [parseDateTime](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). Alias: `DATE_FORMAT`. @@ -3579,7 +3579,7 @@ LIMIT 10 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. -The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). +The opposite operation of this function is [parseDateTimeInJodaSyntax](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). **Replacement fields** @@ -3639,13 +3639,13 @@ dateName(date_part, date) **Arguments** -- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../data-types/string.md). +- `date` — Date. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). +- `timezone` — Timezone. Optional. [String](../data-types/string.md). **Returned value** -- The specified part of date. [String](../../sql-reference/data-types/string.md#string) +- The specified part of date. [String](../data-types/string.md#string) **Example** @@ -3677,11 +3677,11 @@ monthName(date) **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — Date or date with time. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md). **Returned value** -- The name of the month. [String](../../sql-reference/data-types/string.md#string) +- The name of the month. [String](../data-types/string.md#string) **Example** @@ -3704,7 +3704,7 @@ This function converts a Unix timestamp to a calendar date and a time of a day. It can be called in two ways: -When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime). +When given a single argument of type [Integer](../data-types/int-uint.md), it returns a value of type [DateTime](../data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime). Alias: `FROM_UNIXTIME`. @@ -3722,7 +3722,7 @@ Result: └──────────────────────────────┘ ``` -When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used. +When given two or three arguments where the first argument is a value of type [Integer](../data-types/int-uint.md), [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used. **Example:** @@ -3772,11 +3772,11 @@ toModifiedJulianDay(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. [Int32](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Int32](../data-types/int-uint.md). **Example** @@ -3804,11 +3804,11 @@ toModifiedJulianDayOrNull(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). +- Modified Julian Day number. [Nullable(Int32)](../data-types/int-uint.md). **Example** @@ -3836,11 +3836,11 @@ fromModifiedJulianDay(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md). **Returned value** -- Date in text form. [String](../../sql-reference/data-types/string.md) +- Date in text form. [String](../data-types/string.md) **Example** @@ -3868,11 +3868,11 @@ fromModifiedJulianDayOrNull(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md). **Returned value** -- Date in text form. [Nullable(String)](../../sql-reference/data-types/string.md) +- Date in text form. [Nullable(String)](../data-types/string.md) **Example** @@ -3900,8 +3900,8 @@ toUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** @@ -3933,8 +3933,8 @@ fromUTCTimestamp(time_val, time_zone) **Arguments** -- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md) +- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md) **Returned value** @@ -3965,8 +3965,8 @@ timeDiff(first_datetime, second_datetime) *Arguments** -- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) -- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) +- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md) **Returned value** diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 9fda491ac50..a455d0af91b 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -20,11 +20,11 @@ Alias: `normL1`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Examples** @@ -56,11 +56,11 @@ Alias: `normL2`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../../sql-reference/data-types/float.md). +- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../data-types/float.md). **Example** @@ -91,11 +91,11 @@ Alias: `normL2Squared`. ***Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- L2-norm squared. [Float](../../sql-reference/data-types/float.md). +- L2-norm squared. [Float](../data-types/float.md). **Example** @@ -127,11 +127,11 @@ Alias: `normLinf`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Linf-norm or the maximum absolute value. [Float](../../sql-reference/data-types/float.md). +- Linf-norm or the maximum absolute value. [Float](../data-types/float.md). **Example** @@ -163,12 +163,12 @@ Alias: `normLp`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../../sql-reference/data-types/float.md). +- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../data-types/float.md). **Example** @@ -200,12 +200,12 @@ Alias: `distanceL1`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- 1-norm distance. [Float](../../sql-reference/data-types/float.md). +- 1-norm distance. [Float](../data-types/float.md). **Example** @@ -237,12 +237,12 @@ Alias: `distanceL2`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- 2-norm distance. [Float](../../sql-reference/data-types/float.md). +- 2-norm distance. [Float](../data-types/float.md). **Example** @@ -274,12 +274,12 @@ Alias: `distanceL2Squared`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../../sql-reference/data-types/float.md). +- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../data-types/float.md). **Example** @@ -311,12 +311,12 @@ Alias: `distanceLinf`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector1` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Infinity-norm distance. [Float](../../sql-reference/data-types/float.md). +- Infinity-norm distance. [Float](../data-types/float.md). **Example** @@ -348,13 +348,13 @@ Alias: `distanceLp`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- p-norm distance. [Float](../../sql-reference/data-types/float.md). +- p-norm distance. [Float](../data-types/float.md). **Example** @@ -387,11 +387,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -423,11 +423,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -459,11 +459,11 @@ Alias: `normalizeLinf `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -495,12 +495,12 @@ Alias: `normalizeLp `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `p` — The power. Possible values: any number from [1;inf). [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md). **Returned value** -- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). +- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md). **Example** @@ -530,12 +530,12 @@ cosineDistance(vector1, vector2) **Arguments** -- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). +- `vector2` — Second tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md). **Returned value** -- Cosine of the angle between two vectors subtracted from one. [Float](../../sql-reference/data-types/float.md). +- Cosine of the angle between two vectors subtracted from one. [Float](../data-types/float.md). **Examples** diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index bc64fdea427..408b605727d 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -18,7 +18,7 @@ char(number_1, [number_2, ..., number_n]); **Arguments** -- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). +- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../data-types/int-uint.md), [Float](../data-types/float.md). **Returned value** @@ -86,21 +86,21 @@ The function is using uppercase letters `A-F` and not using any prefixes (like ` For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero. -Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime). +Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime). -For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted. +For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted. -Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. **Arguments** -- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to hexadecimal. Types: [String](../data-types/string.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). **Returned value** -- A string with the hexadecimal representation of the argument. [String](../../sql-reference/data-types/string.md). +- A string with the hexadecimal representation of the argument. [String](../data-types/string.md). **Examples** @@ -181,13 +181,13 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). +- `arg` — A string containing any number of hexadecimal digits. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). **Returned value** -- A binary string (BLOB). [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../data-types/string.md). **Example** @@ -231,21 +231,21 @@ Alias: `BIN`. For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero. -Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`). +Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`). -For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted. +For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted. -Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. +Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted. Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string. **Arguments** -- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to binary. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md), or [DateTime](../data-types/datetime.md). **Returned value** -- A string with the binary representation of the argument. [String](../../sql-reference/data-types/string.md). +- A string with the binary representation of the argument. [String](../data-types/string.md). **Examples** @@ -330,11 +330,11 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to **Arguments** -- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). +- `arg` — A string containing any number of binary digits. [String](../data-types/string.md). **Returned value** -- A binary string (BLOB). [String](../../sql-reference/data-types/string.md). +- A binary string (BLOB). [String](../data-types/string.md). **Examples** @@ -386,11 +386,11 @@ bitPositionsToArray(arg) **Arguments** -- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `arg` — Integer value. [Int/UInt](../data-types/int-uint.md). **Returned value** -- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -442,11 +442,11 @@ mortonEncode(args) **Parameters** -- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. +- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type. **Returned value** -- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../data-types/int-uint.md) **Example** @@ -463,7 +463,7 @@ Result: ### Expanded mode -Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. +Accepts a range mask ([tuple](../data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../data-types/int-uint.md) as other arguments. Each number in the mask configures the amount of range expansion:
1 - no expansion
@@ -480,13 +480,13 @@ mortonEncode(range_mask, args) **Parameters** - `range_mask`: 1-8. -- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. +- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type. Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. **Returned value** -- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md) +- A UInt64 code. [UInt64](../data-types/int-uint.md) **Example** @@ -579,7 +579,7 @@ Result: **implementation details** -Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. +Please note that you can fit only so many bits of information into Morton code as [UInt64](../data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. ## mortonDecode @@ -601,11 +601,11 @@ mortonDecode(tuple_size, code) **Parameters** - `tuple_size`: integer value no more than 8. -- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. +- `code`: [UInt64](../data-types/int-uint.md) code. **Returned value** -- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. [UInt64](../../sql-reference/data-types/int-uint.md) +- [tuple](../data-types/tuple.md) of the specified size. [UInt64](../data-types/int-uint.md) **Example** diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 00c9ef376d3..5d82e26eb32 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -30,15 +30,15 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../data-types/string.md#string). +- `plaintext` — Text that need to be encrypted. [String](../data-types/string.md#string). +- `key` — Encryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../data-types/string.md#string). **Examples** @@ -123,14 +123,14 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../data-types/string.md#string). **Examples** @@ -230,15 +230,15 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string). +- `key` — Decryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../data-types/string.md#string). **Examples** @@ -361,14 +361,14 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string). +- `key` — Decryption key. [String](../data-types/string.md#string). +- `iv` — Initialization vector. Optional. [String](../data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../data-types/string.md#string). **Examples** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 41657aafbbe..82c21ce40c8 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -25,9 +25,9 @@ dictGetOrNull('dict_name', attr_name, id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. **Returned value** @@ -239,7 +239,7 @@ dictHas('dict_name', id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. **Returned value** @@ -259,11 +259,11 @@ dictGetHierarchy('dict_name', key) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned value** -- Parents for the key. [Array(UInt64)](../../sql-reference/data-types/array.md). +- Parents for the key. [Array(UInt64)](../data-types/array.md). ## dictIsIn @@ -276,8 +276,8 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. +- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned value** @@ -297,11 +297,11 @@ dictGetChildren(dict_name, key) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. **Returned values** -- First-level descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- First-level descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -344,12 +344,12 @@ dictGetDescendants(dict_name, key, level) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value. +- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../data-types/int-uint.md). **Returned values** -- Descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). +- Descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)). **Example** @@ -409,8 +409,8 @@ dictGetAll('dict_name', attr_names, id_expr[, limit]) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. - `limit` - Maximum length for each value array returned. When truncating, child nodes are given precedence over parent nodes, and otherwise the defined list order for the regexp tree dictionary is respected. If unspecified, array length is unlimited. **Returned value** @@ -499,7 +499,7 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md) or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration. - `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. **Returned value** diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index d62cd1db88d..ac9e21cd416 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -19,7 +19,7 @@ file(path[, default]) **Arguments** - `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. -- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). +- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 90520145b9d..a0dfbebc8ae 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -351,7 +351,7 @@ Result: ## assumeNotNull -Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. +Returns the corresponding non-`Nullable` value for a value of [Nullable](../data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. ``` sql assumeNotNull(x) diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md index 1cbc1933206..d10573b8995 100644 --- a/docs/en/sql-reference/functions/geo/coordinates.md +++ b/docs/en/sql-reference/functions/geo/coordinates.md @@ -152,8 +152,8 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **Input values** -- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. -- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. +- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../data-types/tuple.md) — A tuple of two numbers. +- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. - The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. **Returned values** diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index 80c55650b9c..8abc8006e5d 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi **Arguments** -- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md). -- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md). -- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md). -- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md). -- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md). +- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md). +- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../data-types/int-uint.md). :::note All coordinate parameters must be of the same type: either `Float32` or `Float64`. @@ -86,7 +86,7 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64 **Returned values** -- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). +- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../data-types/array.md)([String](../../data-types/string.md)). - `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. :::note diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 7faff8288b3..bcdd457964a 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -26,12 +26,12 @@ h3IsValid(h3index) **Parameter** -- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- 1 — The number is a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md). -- 0 — The number is not a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The number is a valid H3 index. [UInt8](../../data-types/int-uint.md). +- 0 — The number is not a valid H3 index. [UInt8](../../data-types/int-uint.md). **Example** @@ -61,12 +61,12 @@ h3GetResolution(h3index) **Parameter** -- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). -- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). +- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../data-types/int-uint.md). **Example** @@ -96,11 +96,11 @@ h3EdgeAngle(resolution) **Parameter** -- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md). **Example** @@ -130,11 +130,11 @@ h3EdgeLengthM(resolution) **Parameter** -- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md). **Example** @@ -164,11 +164,11 @@ h3EdgeLengthKm(resolution) **Parameter** -- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md). **Example** @@ -198,14 +198,14 @@ geoToH3(lon, lat, resolution) **Arguments** -- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `lon` — Longitude. [Float64](../../data-types/float.md). +- `lat` — Latitude. [Float64](../../data-types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- 0 in case of error. [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. [UInt64](../../data-types/int-uint.md). +- 0 in case of error. [UInt64](../../data-types/int-uint.md). **Example** @@ -235,11 +235,11 @@ h3ToGeo(h3Index) **Arguments** -- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../data-types/float.md). `lat` — Latitude. [Float64](../../data-types/float.md). **Example** @@ -269,11 +269,11 @@ h3ToGeoBoundary(h3Index) **Arguments** -- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** @@ -304,12 +304,12 @@ h3kRing(h3index, k) **Arguments** -- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Radius. [integer](../../../sql-reference/data-types/int-uint.md) +- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `k` — Radius. [integer](../../data-types/int-uint.md) **Returned values** -- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -345,11 +345,11 @@ h3GetBaseCell(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Hexagon base cell number. [UInt8](../../../sql-reference/data-types/int-uint.md). +- Hexagon base cell number. [UInt8](../../data-types/int-uint.md). **Example** @@ -379,11 +379,11 @@ h3HexAreaM2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Area in square meters. [Float64](../../../sql-reference/data-types/float.md). +- Area in square meters. [Float64](../../data-types/float.md). **Example** @@ -413,11 +413,11 @@ h3HexAreaKm2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Area in square kilometers. [Float64](../../../sql-reference/data-types/float.md). +- Area in square kilometers. [Float64](../../data-types/float.md). **Example** @@ -447,13 +447,13 @@ h3IndexesAreNeighbors(index1, index2) **Arguments** -- `index1` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `index2` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index1` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `index2` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Indexes are neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md). -- `0` — Indexes are not neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Indexes are neighbours. [UInt8](../../data-types/int-uint.md). +- `0` — Indexes are not neighbours. [UInt8](../../data-types/int-uint.md). **Example** @@ -483,12 +483,12 @@ h3ToChildren(index, resolution) **Arguments** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- Array of the child H3-indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of the child H3-indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -518,12 +518,12 @@ h3ToParent(index, resolution) **Arguments** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Parent H3 index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- Parent H3 index. [UInt64](../../data-types/int-uint.md). **Example** @@ -551,11 +551,11 @@ h3ToString(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- String representation of the H3 index. [String](../../../sql-reference/data-types/string.md). +- String representation of the H3 index. [String](../../data-types/string.md). **Example** @@ -585,11 +585,11 @@ stringToH3(index_str) **Parameter** -- `index_str` — String representation of the H3 index. [String](../../../sql-reference/data-types/string.md). +- `index_str` — String representation of the H3 index. [String](../../data-types/string.md). **Returned value** -- Hexagon index number. Returns 0 on error. [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. Returns 0 on error. [UInt64](../../data-types/int-uint.md). **Example** @@ -619,11 +619,11 @@ h3GetResolution(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Example** @@ -653,12 +653,12 @@ h3IsResClassIII(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Index has a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md). -- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index has a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md). +- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md). **Example** @@ -688,12 +688,12 @@ h3IsPentagon(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — Index represents a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md). -- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — Index represents a pentagonal cell. [UInt8](../../data-types/int-uint.md). +- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../data-types/int-uint.md). **Example** @@ -723,11 +723,11 @@ h3GetFaces(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array containing icosahedron faces intersected by a given H3 index. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array containing icosahedron faces intersected by a given H3 index. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -757,11 +757,11 @@ h3CellAreaM2(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Cell area in square meters. [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square meters. [Float64](../../data-types/float.md). **Example** @@ -791,11 +791,11 @@ h3CellAreaRads2(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Cell area in square radians. [Float64](../../../sql-reference/data-types/float.md). +- Cell area in square radians. [Float64](../../data-types/float.md). **Example** @@ -825,12 +825,12 @@ h3ToCenterChild(index, resolution) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned values** -- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../../sql-reference/data-types/int-uint.md). +- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md). **Example** @@ -860,11 +860,11 @@ h3ExactEdgeLengthM(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in meters. [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in meters. [Float64](../../data-types/float.md). **Example** @@ -894,11 +894,11 @@ h3ExactEdgeLengthKm(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in kilometers. [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in kilometers. [Float64](../../data-types/float.md). **Example** @@ -928,11 +928,11 @@ h3ExactEdgeLengthRads(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Exact edge length in radians. [Float64](../../../sql-reference/data-types/float.md). +- Exact edge length in radians. [Float64](../../data-types/float.md). **Example** @@ -962,11 +962,11 @@ h3NumHexagons(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Number of H3 indices. [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of H3 indices. [Int64](../../data-types/int-uint.md). **Example** @@ -996,12 +996,12 @@ h3PointDistM(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in meters.[Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in meters.[Float64](../../data-types/float.md). **Example** @@ -1031,12 +1031,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in kilometers. [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in kilometers. [Float64](../../data-types/float.md). **Example** @@ -1066,12 +1066,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md). **Returned values** -- Haversine or great circle distance in radians. [Float64](../../../sql-reference/data-types/float.md). +- Haversine or great circle distance in radians. [Float64](../../data-types/float.md). **Example** @@ -1101,7 +1101,7 @@ h3GetRes0Indexes() **Returned values** -- Array of all the resolution 0 H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all the resolution 0 H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1132,11 +1132,11 @@ h3GetPentagonIndexes(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md). **Returned value** -- Array of all pentagon H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of all pentagon H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1166,12 +1166,12 @@ h3Line(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md). **Returned value** -Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1201,12 +1201,12 @@ h3Distance(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md). **Returned value** -- Number of grid cells. [Int64](../../../sql-reference/data-types/int-uint.md). +- Number of grid cells. [Int64](../../data-types/int-uint.md). Returns a negative number if finding the distance fails. @@ -1240,12 +1240,12 @@ h3HexRing(index, k) **Parameter** -- `index` — Hexagon index number that represents the origin. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Distance. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents the origin. [UInt64](../../data-types/int-uint.md). +- `k` — Distance. [UInt64](../../data-types/int-uint.md). **Returned values** -- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1275,12 +1275,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex) **Parameter** -- `originIndex` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destinationIndex` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `originIndex` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- Unidirectional Edge Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- Unidirectional Edge Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1310,12 +1310,12 @@ h3UnidirectionalEdgeisValid(index) **Parameter** -- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md). **Returned value** -- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md). -- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md). +- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../data-types/int-uint.md). +- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../data-types/int-uint.md). **Example** @@ -1345,11 +1345,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Origin Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- Origin Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1379,11 +1379,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Destination Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- Destination Hexagon Index number. [UInt64](../../data-types/int-uint.md). **Example** @@ -1413,14 +1413,14 @@ h3GetIndexesFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** A tuple consisting of two values `tuple(origin,destination)`: -- `origin` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destination` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `origin` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md). +- `destination` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md). Returns `(0,0)` if the provided input is not valid. @@ -1452,11 +1452,11 @@ h3GetUnidirectionalEdgesFromHexagon(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -Array of h3 indexes representing each unidirectional edge. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +Array of h3 indexes representing each unidirectional edge. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -1486,11 +1486,11 @@ h3GetUnidirectionalEdgeBoundary(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md). **Returned value** -- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). +- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)). **Example** diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index 2158ef2d57d..3165b21318b 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -21,12 +21,12 @@ geoToS2(lon, lat) **Arguments** -- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- `lon` — Longitude. [Float64](../../data-types/float.md). +- `lat` — Latitude. [Float64](../../data-types/float.md). **Returned values** -- S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- S2 point index. [UInt64](../../data-types/int-uint.md). **Example** @@ -56,13 +56,13 @@ s2ToGeo(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md). **Returned values** - A [tuple](../../data-types/tuple.md) consisting of two values: - - `lon`. [Float64](../../../sql-reference/data-types/float.md). - - `lat`. [Float64](../../../sql-reference/data-types/float.md). + - `lon`. [Float64](../../data-types/float.md). + - `lat`. [Float64](../../data-types/float.md). **Example** @@ -92,11 +92,11 @@ s2GetNeighbors(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md). **Returned value** -- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). +- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). **Example** @@ -126,12 +126,12 @@ s2CellsIntersect(s2index1, s2index2) **Arguments** -- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). -- `0` — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cells intersect. [UInt8](../../data-types/int-uint.md). +- `0` — If the cells don't intersect. [UInt8](../../data-types/int-uint.md). **Example** @@ -161,14 +161,14 @@ s2CapContains(center, degrees, point) **Arguments** -- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `center` — S2 point index corresponding to the cap. [UInt64](../../data-types/int-uint.md). +- `degrees` — Radius of the cap in degrees. [Float64](../../data-types/float.md). +- `point` — S2 point index. [UInt64](../../data-types/int-uint.md). **Returned value** -- `1` — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). -- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md). +- `1` — If the cap contains the S2 point index. [UInt8](../../data-types/int-uint.md). +- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../data-types/int-uint.md). **Example** @@ -198,13 +198,13 @@ s2CapUnion(center1, radius1, center2, radius2) **Arguments** -- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../data-types/int-uint.md). +- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../data-types/float.md). **Returned values** -- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../../sql-reference/data-types/float.md). +- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../data-types/int-uint.md). +- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../data-types/float.md). **Example** @@ -234,14 +234,14 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/float.md). **Example** @@ -271,9 +271,9 @@ s2RectContains(s2PointLow, s2PointHi, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Point` — Target S2 point index. [UInt64](../../data-types/int-uint.md). **Returned value** @@ -308,13 +308,13 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi) **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md). **Example** @@ -344,13 +344,13 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e3968a691a8..506114038f7 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -12,7 +12,7 @@ Simhash is a hash function, which returns close hash values for close (similar) ## halfMD5 -[Interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. +[Interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. ```sql halfMD5(par1, ...) @@ -23,11 +23,11 @@ Consider using the [sipHash64](#siphash64) function instead. **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -61,7 +61,7 @@ sipHash64(par1,...) This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function. -The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: +The function [interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: 1. The first and the second hash value are concatenated to an array which is hashed. 2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. @@ -69,11 +69,11 @@ The function [interprets](/docs/en/sql-reference/functions/type-conversion-funct **Arguments** -The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md). +The function takes a variable number of input parameters of any of the [supported data types](../data-types/index.md). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data. @@ -105,7 +105,7 @@ Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64 **Returned value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -143,7 +143,7 @@ Same as for [sipHash64](#siphash64). **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -183,7 +183,7 @@ Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -217,7 +217,7 @@ Same as for [sipHash128](#siphash128). **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -251,7 +251,7 @@ Same as [sipHash128Reference](#siphash128reference), but the first argument is a **Returned value** -A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -283,11 +283,11 @@ Note that Google changed the algorithm of CityHash after it has been added to Cl **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Examples** @@ -321,7 +321,7 @@ It works faster than intHash32. Average quality. ## SHA1, SHA224, SHA256, SHA512, SHA512_256 -Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). **Syntax** @@ -337,15 +337,15 @@ Even in these cases, we recommend applying the function offline and pre-calculat **Arguments** -- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- `s` — Input string for SHA hash calculation. [String](../data-types/string.md). **Returned value** -- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](../data-types/fixedstring.md). **Example** -Use the [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. +Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. Query: @@ -363,7 +363,7 @@ Result: ## BLAKE3 -Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). **Syntax** @@ -375,15 +375,15 @@ This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust **Arguments** -- s - input string for BLAKE3 hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- s - input string for BLAKE3 hash calculation. [String](../data-types/string.md). **Return value** -- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](../data-types/fixedstring.md). **Example** -Use function [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string. +Use function [hex](../functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string. Query: ```sql @@ -419,11 +419,11 @@ These functions use the `Fingerprint64` and `Hash64` methods respectively from a **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -564,11 +564,11 @@ metroHash64(par1, ...) **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +A [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -602,12 +602,12 @@ Alias: `yandexConsistentHash` (left for backwards compatibility sake). **Parameters** -- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md). -- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md). +- `input`: A UInt64-type key [UInt64](../data-types/int-uint.md). +- `n`: Number of buckets. [UInt16](../data-types/int-uint.md). **Returned value** -- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- A [UInt16](../data-types/int-uint.md) data type hash value. **Implementation details** @@ -638,12 +638,12 @@ murmurHash2_64(par1, ...) **Arguments** -Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. -- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_32` function returns hash value having the [UInt32](../data-types/int-uint.md) data type. +- The `murmurHash2_64` function returns hash value having the [UInt64](../data-types/int-uint.md) data type. **Example** @@ -669,11 +669,11 @@ gccMurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types). **Returned value** -- Calculated hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -706,11 +706,11 @@ MurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types). **Returned value** -- Calculated hash value. [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Calculated hash value. [UInt32](../data-types/int-uint.md). **Example** @@ -741,12 +741,12 @@ murmurHash3_64(par1, ...) **Arguments** -Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). **Returned Value** -- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. -- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_32` function returns a [UInt32](../data-types/int-uint.md) data type hash value. +- The `murmurHash3_64` function returns a [UInt64](../data-types/int-uint.md) data type hash value. **Example** @@ -772,11 +772,11 @@ murmurHash3_128(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — A list of [expressions](../syntax.md/#syntax-expressions). [String](../data-types/string.md). **Returned value** -A 128-bit `MurmurHash3` hash value. [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `MurmurHash3` hash value. [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -806,11 +806,11 @@ xxh3(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. +- `expr` — A list of [expressions](../syntax.md/#syntax-expressions) of any data type. **Returned value** -A 64-bit `xxh3` hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +A 64-bit `xxh3` hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -872,7 +872,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -882,12 +882,12 @@ ngramSimHash(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -909,7 +909,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -919,12 +919,12 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -946,7 +946,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -956,12 +956,12 @@ ngramSimHashUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -983,7 +983,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -993,12 +993,12 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1020,7 +1020,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1030,12 +1030,12 @@ wordShingleSimHash(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1057,7 +1057,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1067,12 +1067,12 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1094,7 +1094,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1104,12 +1104,12 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1131,7 +1131,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. +Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1141,12 +1141,12 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1176,11 +1176,11 @@ wyHash64(string) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `string` — String. [String](../data-types/string.md). **Returned value** -- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Hash value. [UInt64](../data-types/int-uint.md). **Example** @@ -1202,7 +1202,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1212,13 +1212,13 @@ ngramMinHash(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1240,7 +1240,7 @@ Result: Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1250,13 +1250,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1278,7 +1278,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1288,13 +1288,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1316,7 +1316,7 @@ Result: Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1326,13 +1326,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1362,13 +1362,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1398,13 +1398,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1434,13 +1434,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1470,13 +1470,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1498,7 +1498,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1508,13 +1508,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1536,7 +1536,7 @@ Result: Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1546,13 +1546,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1574,7 +1574,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1584,13 +1584,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1612,7 +1612,7 @@ Result: Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive. -Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. +Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same. **Syntax** @@ -1622,13 +1622,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). +- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)). **Example** @@ -1658,13 +1658,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1694,13 +1694,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1730,13 +1730,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1766,13 +1766,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). +- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))). **Example** @@ -1810,7 +1810,7 @@ Alias: `sqid` **Returned Value** -A sqid [String](/docs/en/sql-reference/data-types/string.md). +A sqid [String](../data-types/string.md). **Example** @@ -1837,11 +1837,11 @@ sqidDecode(sqid) **Arguments** -- A sqid - [String](/docs/en/sql-reference/data-types/string.md) +- A sqid - [String](../data-types/string.md) **Returned Value** -The sqid transformed to numbers [Array(UInt64)](/docs/en/sql-reference/data-types/array.md). +The sqid transformed to numbers [Array(UInt64)](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index d07a5292431..c0256ba4735 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -11,7 +11,7 @@ There are at least\* two types of functions - regular functions (they are just c In this section we discuss regular functions. For aggregate functions, see the section “Aggregate functions”. :::note -There is a third type of function that the [‘arrayJoin’ function](/docs/en/sql-reference/functions/array-join.md) belongs to. And [table functions](/docs/en/sql-reference/table-functions/index.md) can also be mentioned separately. +There is a third type of function that the [‘arrayJoin’ function](../functions/array-join.md) belongs to. And [table functions](../table-functions/index.md) can also be mentioned separately. ::: ## Strong Typing @@ -63,4 +63,4 @@ For some functions the first argument (the lambda function) can be omitted. In t ## User Defined Functions (UDFs) -ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md). +ClickHouse supports user-defined functions. See [UDFs](../functions/udf.md). diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index be8a2956d41..540e148e3f1 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -36,14 +36,14 @@ addressToLine(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Source code filename and the line number in this file delimited by colon. [String](../../sql-reference/data-types/string.md). +- Source code filename and the line number in this file delimited by colon. [String](../data-types/string.md). - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. -- Name of a binary, if the function couldn’t find the debug information. [String](../../sql-reference/data-types/string.md). -- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md). +- Name of a binary, if the function couldn’t find the debug information. [String](../data-types/string.md). +- Empty string, if the address is not valid. [String](../data-types/string.md). **Example** @@ -124,7 +124,7 @@ addressToLineWithInlines(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** @@ -132,7 +132,7 @@ addressToLineWithInlines(address_of_binary_instruction) - Array with single element which is name of a binary, if the function couldn’t find the debug information. -- Empty array, if the address is not valid. [Array(String)](../../sql-reference/data-types/array.md). +- Empty array, if the address is not valid. [Array(String)](../data-types/array.md). **Example** @@ -225,12 +225,12 @@ addressToSymbol(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Symbol from ClickHouse object files. [String](../../sql-reference/data-types/string.md). -- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md). +- Symbol from ClickHouse object files. [String](../data-types/string.md). +- Empty string, if the address is not valid. [String](../data-types/string.md). **Example** @@ -320,12 +320,12 @@ demangle(symbol) **Arguments** -- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. +- `symbol` ([String](../data-types/string.md)) — Symbol from an object file. **Returned value** -- Name of the C++ function. [String](../../sql-reference/data-types/string.md). -- Empty string if a symbol is not valid. [String](../../sql-reference/data-types/string.md). +- Name of the C++ function. [String](../data-types/string.md). +- Empty string if a symbol is not valid. [String](../data-types/string.md). **Example** @@ -414,7 +414,7 @@ tid() **Returned value** -- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Current thread id. [Uint64](../data-types/int-uint.md#uint-ranges). **Example** @@ -444,7 +444,7 @@ logTrace('message') **Arguments** -- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). +- `message` — Message that is emitted to server log. [String](../data-types/string.md#string). **Returned value** diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 21beffbd0a8..5b6a3aef2c8 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -147,11 +147,11 @@ IPv6StringToNum(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- IPv6 address in binary format. [FixedString(16)](../../sql-reference/data-types/fixedstring.md). +- IPv6 address in binary format. [FixedString(16)](../data-types/fixedstring.md). **Example** @@ -246,7 +246,7 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32); ## toIPv4(string) -An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../../sql-reference/data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. +An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`. ``` sql WITH @@ -294,7 +294,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null ## toIPv6 -Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. +Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format. If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned. @@ -307,11 +307,11 @@ toIPv6(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md) +- `string` — IP address. [String](../data-types/string.md) **Returned value** -- IP address. [IPv6](../../sql-reference/data-types/ipv6.md). +- IP address. [IPv6](../data-types/ipv6.md). **Examples** @@ -366,11 +366,11 @@ isIPv4String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -402,11 +402,11 @@ isIPv6String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../data-types/string.md). **Returned value** -- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -441,12 +441,12 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). -- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../data-types/string.md). +- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../data-types/string.md). **Returned value** -- `1` or `0`. [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` or `0`. [UInt8](../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index dc4a3d871e7..8359d5f9fbc 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -31,7 +31,7 @@ simpleJSONHas(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -71,7 +71,7 @@ simpleJSONExtractUInt(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -118,7 +118,7 @@ simpleJSONExtractInt(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -165,7 +165,7 @@ simpleJSONExtractFloat(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -212,7 +212,7 @@ simpleJSONExtractBool(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** @@ -259,12 +259,12 @@ simpleJSONExtractRaw(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. +It returns the value of the field as a [`String`](../data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. **Example** @@ -306,12 +306,12 @@ simpleJSONExtractString(json, field_name) **Parameters** -- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string) - `field_name`: The name of the field to search for. [String literal](../syntax#string) **Returned value** -It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. +It returns the value of a field as a [`String`](../data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. **Implementation details** @@ -528,12 +528,12 @@ JSONExtractKeys(json[, a, b, c...]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../data-types/string.md) with valid JSON. +- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../data-types/string.md) to get the field by the key or an [Integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned value** -Array with the keys of the JSON. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Array with the keys of the JSON. [Array](../data-types/array.md)([String](../data-types/string.md)). **Example** @@ -588,13 +588,13 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../data-types/string.md) with valid JSON. +- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../data-types/string.md) to get the field by the key or an [integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). -- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). +- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). **Examples** @@ -719,9 +719,9 @@ Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, jso ## toJSONString Serializes a value to its JSON representation. Various data types and nested structures are supported. -64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. +64-bit [integers](../data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them. -When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name. +When serializing an [Enum](../data-types/enum.md) value, the function outputs its name. **Syntax** @@ -735,12 +735,12 @@ toJSONString(value) **Returned value** -- JSON representation of the value. [String](../../sql-reference/data-types/string.md). +- JSON representation of the value. [String](../data-types/string.md). **Example** -The first example shows serialization of a [Map](../../sql-reference/data-types/map.md). -The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md). +The first example shows serialization of a [Map](../data-types/map.md). +The second example shows some special values wrapped into a [Tuple](../data-types/tuple.md). Query: @@ -776,11 +776,11 @@ Alias: `JSON_ARRAY_LENGTH(json)`. **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../data-types/string.md) with valid JSON. **Returned value** -- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). +- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../data-types/int-uint.md). **Example** @@ -807,11 +807,11 @@ jsonMergePatch(json1, json2, ...) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../data-types/string.md) with valid JSON. **Returned value** -- If JSON object strings are valid, return the merged JSON object string. [String](../../sql-reference/data-types/string.md). +- If JSON object strings are valid, return the merged JSON object string. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 1977c5c2a7e..8448dd4ff12 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -6,7 +6,7 @@ sidebar_label: Logical # Logical Functions -Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../../sql-reference/data-types/int-uint.md) or in some cases `NULL`. +Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../data-types/int-uint.md) or in some cases `NULL`. Zero as an argument is considered `false`, non-zero values are considered `true`. @@ -26,13 +26,13 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** -- `0`, if at least one argument evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `0`, if at least one argument evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). - `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`. [NULL](../../sql-reference/syntax.md/#null). -- `1`, otherwise. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `1`, otherwise. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). **Example** @@ -78,7 +78,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** @@ -86,7 +86,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera - `0`, if all arguments evaluate to `false`, - `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`. -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +Type: [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). **Example** @@ -130,12 +130,12 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne **Arguments** -- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — The value. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** -- `1`, if `val` evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). -- `0`, if `val` evaluates to `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `1`, if `val` evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). +- `0`, if `val` evaluates to `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). - `NULL`, if `val` is `NULL`. [NULL](../../sql-reference/syntax.md/#null). **Example** @@ -164,12 +164,12 @@ xor(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md). **Returned value** -- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). -- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). +- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). +- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). - `NULL`, if at least one of the inputs is `NULL`. [NULL](../../sql-reference/syntax.md/#null). **Example** diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 03ddc38ef50..7f50fa933b6 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -18,7 +18,7 @@ e() **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## pi @@ -31,7 +31,7 @@ pi() ``` **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## exp @@ -45,11 +45,11 @@ exp(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## log @@ -65,11 +65,11 @@ Alias: `ln(x)` **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## exp2 @@ -83,11 +83,11 @@ exp2(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## intExp2 @@ -111,11 +111,11 @@ log2(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## exp10 @@ -129,11 +129,11 @@ exp10(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## intExp10 @@ -157,11 +157,11 @@ log10(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## sqrt @@ -173,11 +173,11 @@ sqrt(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## cbrt @@ -189,11 +189,11 @@ cbrt(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## erf @@ -207,11 +207,11 @@ erf(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). **Example** @@ -239,11 +239,11 @@ erfc(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## lgamma @@ -257,11 +257,11 @@ lgamma(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## tgamma @@ -275,11 +275,11 @@ gamma(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## sin @@ -293,11 +293,11 @@ sin(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). **Example** @@ -323,11 +323,11 @@ cos(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## tan @@ -341,11 +341,11 @@ tan(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## asin @@ -359,11 +359,11 @@ asin(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## acos @@ -377,11 +377,11 @@ acos(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## atan @@ -395,11 +395,11 @@ atan(x) **Arguments** -- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -Type: [Float*](../../sql-reference/data-types/float.md). +Type: [Float*](../data-types/float.md). ## pow @@ -415,12 +415,12 @@ Alias: `power(x, y)` **Arguments** -- `x` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) -- `y` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) +- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md) +- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md) **Returned value** -Type: [Float64](../../sql-reference/data-types/float.md). +Type: [Float64](../data-types/float.md). ## cosh @@ -434,13 +434,13 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `1 <= cosh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -468,13 +468,13 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -502,13 +502,13 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-∞ < sinh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -536,13 +536,13 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -569,13 +569,13 @@ tanh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-1 < tanh(x) < 1`. -Type: [Float*](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float*](../data-types/float.md#float32-float64). **Example** @@ -601,13 +601,13 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -635,14 +635,14 @@ atan2(y, x) **Arguments** -- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). -- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). +- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). +- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** - The angle `θ` such that `−π < θ ≤ π`, in radians. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -670,14 +670,14 @@ hypot(x, y) **Arguments** -- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). -- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md). +- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). +- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** - The length of the hypotenuse of a right-angle triangle. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -705,13 +705,13 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Values from the interval: `-∞ < log1p(x) < +∞`. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** @@ -747,7 +747,7 @@ sign(x) - 0 for `x = 0` - 1 for `x > 0` -Type: [Int8](../../sql-reference/data-types/int-uint.md). +Type: [Int8](../data-types/int-uint.md). **Examples** @@ -804,11 +804,11 @@ sigmoid(x) **Parameters** -- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../../sql-reference/data-types/float.md). +- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../data-types/float.md). **Example** @@ -838,11 +838,11 @@ degrees(x) **Arguments** -- `x` — Input in radians. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Value in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- Value in degrees. [Float64](../data-types/float.md#float32-float64). **Example** @@ -870,13 +870,13 @@ radians(x) **Arguments** -- `x` — Input in degrees. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md). +- `x` — Input in degrees. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** - Value in radians. -Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). +Type: [Float64](../data-types/float.md#float32-float64). **Example** diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 3e0458d226d..4bfa181a35f 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -23,7 +23,7 @@ stem('language', word) ### Arguments - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). -- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). +- `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string). ### Examples @@ -88,8 +88,8 @@ lemmatize('language', word) ### Arguments -- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. [String](../data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). ### Examples @@ -139,8 +139,8 @@ synonyms('extension_name', word) ### Arguments -- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). +- `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string). +- `word` — Word that will be searched in extension. [String](../data-types/string.md#string). ### Examples @@ -188,7 +188,7 @@ detectLanguage('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -226,7 +226,7 @@ detectLanguageMixed('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -262,7 +262,7 @@ detectLanguageUnknown('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value @@ -302,7 +302,7 @@ detectCharset('text_to_be_analyzed') ### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). ### Returned value diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 45fc12388fe..dfe1224f7b8 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -33,11 +33,11 @@ getMacro(name); **Arguments** -- `name` — Macro name to retrieve from the `` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Macro name to retrieve from the `` section. [String](../data-types/string.md#string). **Returned value** -- Value of the specified macro. [String](../../sql-reference/data-types/string.md). +- Value of the specified macro. [String](../data-types/string.md). **Example** @@ -116,7 +116,7 @@ basename(expr) **Arguments** -- `expr` — A value of type [String](../../sql-reference/data-types/string.md). Backslashes must be escaped. +- `expr` — A value of type [String](../data-types/string.md). Backslashes must be escaped. **Returned Value** @@ -237,11 +237,11 @@ byteSize(argument [, ...]) **Returned value** -- Estimation of byte size of the arguments in memory. [UInt64](../../sql-reference/data-types/int-uint.md). +- Estimation of byte size of the arguments in memory. [UInt64](../data-types/int-uint.md). **Examples** -For [String](../../sql-reference/data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). +For [String](../data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). Query: @@ -350,7 +350,7 @@ sleep(seconds) **Arguments** -- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../data-types/int-uint.md) or [Float](../data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -400,7 +400,7 @@ sleepEachRow(seconds) **Arguments** -- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../data-types/int-uint.md) or [Float*](../data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -494,8 +494,8 @@ isConstant(x) **Returned values** -- `1` if `x` is constant. [UInt8](../../sql-reference/data-types/int-uint.md). -- `0` if `x` is non-constant. [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `x` is constant. [UInt8](../data-types/int-uint.md). +- `0` if `x` is non-constant. [UInt8](../data-types/int-uint.md). **Examples** @@ -963,7 +963,7 @@ uptime() **Returned value** -- Time value of seconds. [UInt32](/docs/en/sql-reference/data-types/int-uint.md). +- Time value of seconds. [UInt32](../data-types/int-uint.md). **Example** @@ -1226,7 +1226,7 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st **Arguments** - `column` — A column name or scalar expression. -- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../../sql-reference/data-types/int-uint.md). +- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../data-types/int-uint.md). - `default_value` — Optional. The returned value if offset is beyond the block boundaries. Type of data blocks affected. **Returned values** @@ -1446,12 +1446,12 @@ runningConcurrency(start, end) **Arguments** -- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `start` — A column with the start time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md). **Returned values** -- The number of concurrent events at each event start time. [UInt32](../../sql-reference/data-types/int-uint.md) +- The number of concurrent events at each event start time. [UInt32](../data-types/int-uint.md) **Example** @@ -1515,7 +1515,7 @@ MACStringToOUI(s) ## getSizeOfEnumType -Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). +Returns the number of fields in [Enum](../data-types/enum.md). An exception is thrown if the type is not `Enum`. **Syntax** @@ -1674,7 +1674,7 @@ defaultValueOfArgumentType(expression) - `0` for numbers. - Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md). **Example** @@ -1724,7 +1724,7 @@ defaultValueOfTypeName(type) - `0` for numbers. - Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md). **Example** @@ -1937,7 +1937,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of remaining space available in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1967,7 +1967,7 @@ filesystemFree() **Returned value** -- The amount of free space in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- The amount of free space in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -1997,7 +1997,7 @@ filesystemCapacity() **Returned value** -- Capacity of the filesystem in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- Capacity of the filesystem in bytes. [UInt64](../data-types/int-uint.md). **Example** @@ -2017,7 +2017,7 @@ Result: ## initializeAggregation -Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. +Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. **Syntax** @@ -2027,7 +2027,7 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN) **Arguments** -- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). +- `aggregate_function` — Name of the aggregation function to initialize. [String](../data-types/string.md). - `arg` — Arguments of aggregate function. **Returned value(s)** @@ -2102,7 +2102,7 @@ finalizeAggregation(state) **Arguments** -- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `state` — State of aggregation. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction). **Returned value(s)** @@ -2210,8 +2210,8 @@ runningAccumulate(agg_state[, grouping]); **Arguments** -- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). -- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. +- `agg_state` — State of the aggregate function. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../data-types/index.md) for which the equality operator is defined. **Returned value** @@ -2485,7 +2485,7 @@ getSetting('custom_setting'); **Parameter** -- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). +- `custom_setting` — The setting name. [String](../data-types/string.md). **Returned value** @@ -2510,7 +2510,7 @@ Result: ## isDecimalOverflow -Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is outside its precision or outside the specified precision. +Checks whether the [Decimal](../data-types/decimal.md) value is outside its precision or outside the specified precision. **Syntax** @@ -2520,8 +2520,8 @@ isDecimalOverflow(d, [p]) **Arguments** -- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `d` — value. [Decimal](../data-types/decimal.md). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../data-types/int-uint.md#uint-ranges). **Returned values** @@ -2557,11 +2557,11 @@ countDigits(x) **Arguments** -- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. +- `x` — [Int](../data-types/int-uint.md) or [Decimal](../data-types/decimal.md) value. **Returned value** -- Number of digits. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges). :::note For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). @@ -2585,7 +2585,7 @@ Result: ## errorCodeToName -- The textual name of an error code. [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). +- The textual name of an error code. [LowCardinality(String)](../data-types/lowcardinality.md). **Syntax** @@ -2616,7 +2616,7 @@ tcpPort() **Returned value** -- The TCP port number. [UInt16](../../sql-reference/data-types/int-uint.md). +- The TCP port number. [UInt16](../data-types/int-uint.md). **Example** @@ -2652,7 +2652,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the current user settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## enabledProfiles @@ -2666,7 +2666,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## defaultProfiles @@ -2680,7 +2680,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)). ## currentRoles @@ -2694,7 +2694,7 @@ currentRoles() **Returned value** -- A list of the current roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- A list of the current roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## enabledRoles @@ -2708,7 +2708,7 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the enabled roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## defaultRoles @@ -2722,7 +2722,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- List of the default roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)). ## getServerPort @@ -2736,7 +2736,7 @@ getServerPort(port_name) **Arguments** -- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: +- `port_name` — The name of the server port. [String](../data-types/string.md#string). Possible values: - 'tcp_port' - 'tcp_port_secure' @@ -2751,7 +2751,7 @@ getServerPort(port_name) **Returned value** -- The number of the server port. [UInt16](../../sql-reference/data-types/int-uint.md). +- The number of the server port. [UInt16](../data-types/int-uint.md). **Example** @@ -2783,7 +2783,7 @@ queryID() **Returned value** -- The ID of the current query. [String](../../sql-reference/data-types/string.md) +- The ID of the current query. [String](../data-types/string.md) **Example** @@ -2817,7 +2817,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. [String](../../sql-reference/data-types/string.md) +- The ID of the initial current query. [String](../data-types/string.md) **Example** @@ -2850,7 +2850,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. [UInt32](../../sql-reference/data-types/int-uint.md). +- Shard index or constant `0`. [UInt32](../data-types/int-uint.md). **Example** @@ -2890,7 +2890,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. [UInt32](../../sql-reference/data-types/int-uint.md). +- Total number of shards or `0`. [UInt32](../data-types/int-uint.md). **See Also** @@ -2912,7 +2912,7 @@ getOSKernelVersion() **Returned value** -- The current OS kernel version. [String](../../sql-reference/data-types/string.md). +- The current OS kernel version. [String](../data-types/string.md). **Example** @@ -2946,7 +2946,7 @@ zookeeperSessionUptime() **Returned value** -- Uptime of the current ZooKeeper session in seconds. [UInt32](../../sql-reference/data-types/int-uint.md). +- Uptime of the current ZooKeeper session in seconds. [UInt32](../data-types/int-uint.md). **Example** @@ -2983,7 +2983,7 @@ All arguments must be constant. **Returned value** -- Randomly generated table structure. [String](../../sql-reference/data-types/string.md). +- Randomly generated table structure. [String](../data-types/string.md). **Examples** @@ -3050,7 +3050,7 @@ structureToCapnProtoSchema(structure) **Returned value** -- CapnProto schema. [String](../../sql-reference/data-types/string.md). +- CapnProto schema. [String](../data-types/string.md). **Examples** @@ -3149,7 +3149,7 @@ structureToProtobufSchema(structure) **Returned value** -- Protobuf schema. [String](../../sql-reference/data-types/string.md). +- Protobuf schema. [String](../data-types/string.md). **Examples** @@ -3229,11 +3229,11 @@ formatQueryOrNull(query) **Arguments** -- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md) +- `query` - The SQL query to be formatted. [String](../data-types/string.md) **Returned value** -- The formatted query. [String](../../sql-reference/data-types/string.md). +- The formatted query. [String](../data-types/string.md). **Example** @@ -3268,11 +3268,11 @@ formatQuerySingleLineOrNull(query) **Arguments** -- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md) +- `query` - The SQL query to be formatted. [String](../data-types/string.md) **Returned value** -- The formatted query. [String](../../sql-reference/data-types/string.md). +- The formatted query. [String](../data-types/string.md). **Example** @@ -3300,8 +3300,8 @@ variantElement(variant, type_name, [, default_value]) **Arguments** -- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). -- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md). +- `variant` — Variant column. [Variant](../data-types/variant.md). +- `type_name` — The name of the variant type to extract. [String](../data-types/string.md). - `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional. **Returned value** @@ -3337,7 +3337,7 @@ variantType(variant) **Arguments** -- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). +- `variant` — Variant column. [Variant](../data-types/variant.md). **Returned value** @@ -3553,7 +3553,7 @@ showCertificate() **Returned value** -- Map of key-value pairs relating to the configured SSL certificate. [Map](../../sql-reference/data-types/map.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). +- Map of key-value pairs relating to the configured SSL certificate. [Map](../data-types/map.md)([String](../data-types/string.md), [String](../data-types/string.md)). **Example** diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index a7866c6d12e..a9b483aa0e5 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -169,7 +169,7 @@ randUniform(min, max) ### Returned value -A random number of type [Float64](/docs/en/sql-reference/data-types/float.md). +A random number of type [Float64](../data-types/float.md). ### Example @@ -204,7 +204,7 @@ randNormal(mean, variance) **Returned value** -- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -241,7 +241,7 @@ randLogNormal(mean, variance) **Returned value** -- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -278,7 +278,7 @@ randBinomial(experiments, probability) **Returned value** -- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -315,7 +315,7 @@ randNegativeBinomial(experiments, probability) **Returned value** -- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -351,7 +351,7 @@ randPoisson(n) **Returned value** -- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -387,7 +387,7 @@ randBernoulli(probability) **Returned value** -- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- Random number. [UInt64](../data-types/int-uint.md). **Example** @@ -423,7 +423,7 @@ randExponential(lambda) **Returned value** -- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -459,7 +459,7 @@ randChiSquared(degree_of_freedom) **Returned value** -- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -495,7 +495,7 @@ randStudentT(degree_of_freedom) **Returned value** -- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -532,7 +532,7 @@ randFisherF(d1, d2) **Returned value** -- Random number. [Float64](/docs/en/sql-reference/data-types/float.md). +- Random number. [Float64](../data-types/float.md). **Example** @@ -568,7 +568,7 @@ randomString(length) **Returned value** -- String filled with random bytes. [String](../../sql-reference/data-types/string.md). +- String filled with random bytes. [String](../data-types/string.md). **Example** @@ -604,11 +604,11 @@ randomFixedString(length); **Arguments** -- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — String length in bytes. [UInt64](../data-types/int-uint.md). **Returned value(s)** -- String filled with random bytes. [FixedString](../../sql-reference/data-types/fixedstring.md). +- String filled with random bytes. [FixedString](../data-types/fixedstring.md). **Example** @@ -643,7 +643,7 @@ randomPrintableASCII(length) **Returned value** -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../../sql-reference/data-types/string.md) +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../data-types/string.md) **Example** @@ -671,11 +671,11 @@ randomStringUTF8(length); **Arguments** -- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — Length of the string in code points. [UInt64](../data-types/int-uint.md). **Returned value(s)** -- UTF-8 random string. [String](../../sql-reference/data-types/string.md). +- UTF-8 random string. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 20f73de4410..ab344f664fd 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -36,8 +36,8 @@ Alias: `truncate`. **Parameters** -- `input`: A numeric type ([Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md) or [Integer](/docs/en/sql-reference/data-types/int-uint.md)). -- `precision`: An [Integer](/docs/en/sql-reference/data-types/int-uint.md) type. +- `input`: A numeric type ([Float](../data-types/float.md), [Decimal](../data-types/decimal.md) or [Integer](../data-types/int-uint.md)). +- `precision`: An [Integer](../data-types/int-uint.md) type. **Returned value** @@ -69,7 +69,7 @@ round(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). - `decimal-places` — An integer value. - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. @@ -171,7 +171,7 @@ roundBankers(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 8aa171949a3..9ec4ee974c4 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -19,13 +19,13 @@ splitByChar(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `separator` — The separator which should contain exactly one character. [String](../data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible. **Returned value(s)** -- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). :::note Empty substrings may be selected when: @@ -72,13 +72,13 @@ splitByString(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `separator` — The separator. [String](../data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). :::note Empty substrings may be selected when: @@ -129,13 +129,13 @@ splitByRegexp(regexp, s[, max_substrings])) **Arguments** - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). :::note Empty substrings may be selected when: @@ -186,13 +186,13 @@ splitByWhitespace(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). :::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. @@ -225,13 +225,13 @@ splitByNonAlpha(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). :::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. @@ -288,12 +288,12 @@ Alias: `splitByAlpha` **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** -- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). :::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. @@ -357,12 +357,12 @@ ngrams(string, ngramsize) **Arguments** -- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). +- `string` — String. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../data-types/int-uint.md). **Returned values** -- Array with n-grams. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +- Array with n-grams. [Array](../data-types/array.md)([String](../data-types/string.md)). **Example** @@ -384,7 +384,7 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object. **Returned value** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index f45ceb99617..342ca2b9f03 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -183,7 +183,7 @@ left(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -230,7 +230,7 @@ leftUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -347,7 +347,7 @@ right(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -394,7 +394,7 @@ rightUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -513,11 +513,11 @@ Alias: `lcase` **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Example** @@ -547,11 +547,11 @@ Alias: `ucase` **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Examples** @@ -591,11 +591,11 @@ upperUTF8(input) **Parameters** -- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `input`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Example** @@ -627,7 +627,7 @@ toValidUTF8(input_string) **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object. **Returned value** @@ -659,8 +659,8 @@ Alias: `REPEAT` **Arguments** -- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). -- `n` — The number of times to repeat the string. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). +- `s` — The string to repeat. [String](../data-types/string.md). +- `n` — The number of times to repeat the string. [UInt* or Int*](../data-types/int-uint.md). **Returned value** @@ -694,7 +694,7 @@ Alias: `SPACE`. **Arguments** -- `n` — The number of times to repeat the space. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). +- `n` — The number of times to repeat the space. [UInt* or Int*](../data-types/int-uint.md). **Returned value** @@ -738,7 +738,7 @@ concat(s1, s2, ...) At least one value of arbitrary type. -Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. +Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** @@ -845,8 +845,8 @@ Alias: `concat_ws` **Arguments** -- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. +- sep — separator. Const [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- exprN — expression to be concatenated. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** @@ -891,9 +891,9 @@ Alias: **Arguments** -- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) -- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). -- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. +- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -927,9 +927,9 @@ substringUTF8(s, offset[, length]) **Arguments** -- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) -- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). -- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. +- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -965,8 +965,8 @@ Alias: `SUBSTRING_INDEX` **Arguments** -- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). -- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- s: The string to extract substring from. [String](../data-types/string.md). +- delim: The character to split. [String](../data-types/string.md). - count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** @@ -996,13 +996,13 @@ substringIndexUTF8(s, delim, count) **Arguments** -- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md). -- `delim`: The character to split. [String](../../sql-reference/data-types/string.md). +- `s`: The string to extract substring from. [String](../data-types/string.md). +- `delim`: The character to split. [String](../data-types/string.md). - `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Returned value** -A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`. +A substring [String](../data-types/string.md) of `s` before `count` occurrences of `delim`. **Implementation details** @@ -1050,11 +1050,11 @@ base58Encode(plaintext) **Arguments** -- `plaintext` — [String](../../sql-reference/data-types/string.md) column or constant. +- `plaintext` — [String](../data-types/string.md) column or constant. **Returned value** -- A string containing the encoded value of the argument. [String](../../sql-reference/data-types/string.md). +- A string containing the encoded value of the argument. [String](../data-types/string.md). **Example** @@ -1082,7 +1082,7 @@ base58Decode(encoded) **Arguments** -- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. **Returned value** @@ -1114,7 +1114,7 @@ tryBase58Decode(encoded) **Parameters** -- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. **Returned value** @@ -1158,7 +1158,7 @@ tryBase64Decode(encoded) **Parameters** -- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. **Examples** @@ -1257,8 +1257,8 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Arguments** -- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). -- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). +- `trim_character` — Specified characters for trim. [String](../data-types/string.md). +- `input_string` — String for trim. [String](../data-types/string.md). **Returned value** @@ -1292,7 +1292,7 @@ Alias: `ltrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** @@ -1326,7 +1326,7 @@ Alias: `rtrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** @@ -1360,7 +1360,7 @@ Alias: `trim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../data-types/string.md). **Returned value** @@ -1410,11 +1410,11 @@ normalizeQuery(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../data-types/string.md). **Returned value** -- Sequence of characters with placeholders. [String](../../sql-reference/data-types/string.md). +- Sequence of characters with placeholders. [String](../data-types/string.md). **Example** @@ -1442,11 +1442,11 @@ normalizedQueryHash(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../data-types/string.md). **Returned value** -- Hash value. [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges). **Example** @@ -1474,11 +1474,11 @@ normalizeUTF8NFC(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFC normalization form. [String](../../sql-reference/data-types/string.md). +- String transformed to NFC normalization form. [String](../data-types/string.md). **Example** @@ -1506,11 +1506,11 @@ normalizeUTF8NFD(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFD normalization form. [String](../../sql-reference/data-types/string.md). +- String transformed to NFD normalization form. [String](../data-types/string.md). **Example** @@ -1538,11 +1538,11 @@ normalizeUTF8NFKC(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFKC normalization form. [String](../../sql-reference/data-types/string.md). +- String transformed to NFKC normalization form. [String](../data-types/string.md). **Example** @@ -1570,11 +1570,11 @@ normalizeUTF8NFKD(words) **Arguments** -- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../data-types/string.md). **Returned value** -- String transformed to NFKD normalization form. [String](../../sql-reference/data-types/string.md). +- String transformed to NFKD normalization form. [String](../data-types/string.md). **Example** @@ -1605,11 +1605,11 @@ encodeXMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The escaped string. [String](../../sql-reference/data-types/string.md). +- The escaped string. [String](../data-types/string.md). **Example** @@ -1643,11 +1643,11 @@ decodeXMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The un-escaped string. [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../data-types/string.md). **Example** @@ -1677,11 +1677,11 @@ decodeHTMLComponent(x) **Arguments** -- `x` — An input string. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../data-types/string.md). **Returned value** -- The un-escaped string. [String](../../sql-reference/data-types/string.md). +- The un-escaped string. [String](../data-types/string.md). **Example** @@ -1730,11 +1730,11 @@ extractTextFromHTML(x) **Arguments** -- `x` — input text. [String](../../sql-reference/data-types/string.md). +- `x` — input text. [String](../data-types/string.md). **Returned value** -- Extracted text. [String](../../sql-reference/data-types/string.md). +- Extracted text. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 0e183626555..7aeb1f5b2a7 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -202,13 +202,13 @@ translateUTF8(s, from, to) **Parameters** -- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). -- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md). -- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `s`: A string type [String](../data-types/string.md). +- `from`: A string type [String](../data-types/string.md). +- `to`: A string type [String](../data-types/string.md). **Returned value** -- A [String](/docs/en/sql-reference/data-types/string.md) data type value. +- A [String](../data-types/string.md) data type value. **Examples** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 43b9e621bc0..07f776906e6 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -17,7 +17,7 @@ Functions in this section also assume that the searched string (referred to in t violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the results are undefined. Note that no automatic Unicode normalization is performed, however you can use the -[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +[normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that. [General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. @@ -38,12 +38,12 @@ Alias: - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. **Returned values** -- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../../sql-reference/data-types/int-uint.md). -- 0, if the substring was not found. [UInt64](../../sql-reference/data-types/int-uint.md). +- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../data-types/int-uint.md). +- 0, if the substring was not found. [UInt64](../data-types/int-uint.md). If substring `needle` is empty, these rules apply: - if no `start_pos` was specified: return `1` @@ -204,7 +204,7 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) **Arguments** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned values** @@ -239,7 +239,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -273,7 +273,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -309,7 +309,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -347,7 +347,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -381,7 +381,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -415,7 +415,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -451,7 +451,7 @@ multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., ne **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -488,7 +488,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -522,7 +522,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -556,7 +556,7 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -592,7 +592,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -630,7 +630,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — Substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -664,7 +664,7 @@ multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -698,7 +698,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). +- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md). **Returned value** @@ -734,7 +734,7 @@ multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md) **Returned value** @@ -894,12 +894,12 @@ extractAllGroupsHorizontal(haystack, pattern) **Arguments** -- `haystack` — Input string. [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md). **Returned value** -- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../data-types/array.md). :::note If `haystack` does not match the `pattern` regex, an array of empty arrays is returned. @@ -931,12 +931,12 @@ extractAllGroupsVertical(haystack, pattern) **Arguments** -- `haystack` — Input string. [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. [String](../data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md). **Returned value** -- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md). +- Array of arrays of matches. [Array](../data-types/array.md). :::note If `haystack` does not match the `pattern` regex, an empty array is returned. @@ -970,7 +970,7 @@ Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which i If the haystack or the LIKE expression are not valid UTF-8, the behavior is undefined. -No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that. To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. @@ -1007,7 +1007,7 @@ Alias: `haystack NOT ILIKE pattern` (operator) ## ngramDistance -Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. +Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. @@ -1024,7 +1024,7 @@ ngramDistance(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Implementation details** @@ -1078,7 +1078,7 @@ ngramDistanceCaseInsensitive(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Examples** @@ -1127,7 +1127,7 @@ ngramDistanceUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Example** @@ -1160,7 +1160,7 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64) **Example** @@ -1178,7 +1178,7 @@ Result: ## ngramSearch -Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. @@ -1195,7 +1195,7 @@ ngramSearch(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) **Implementation details** @@ -1234,7 +1234,7 @@ ngramSearchCaseInsensitive(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1269,7 +1269,7 @@ ngramSearchUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1304,7 +1304,7 @@ ngramSearchCaseInsensitiveUTF8(haystack, needle) **Returned value** -- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64) The bigger the result is, the more likely `needle` is in the `haystack`. @@ -1338,11 +1338,11 @@ countSubstrings(haystack, needle[, start_pos]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. **Returned values** -- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1385,11 +1385,11 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. **Returned values** -- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1437,11 +1437,11 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. **Returned values** -- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of occurrences. [UInt64](../data-types/int-uint.md). **Examples** @@ -1488,11 +1488,11 @@ countMatches(haystack, pattern) **Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md). **Returned value** -- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../data-types/int-uint.md). **Examples** @@ -1533,11 +1533,11 @@ countMatchesCaseInsensitive(haystack, pattern) **Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md). **Returned value** -- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md). +- The number of matches. [UInt64](../data-types/int-uint.md). **Examples** @@ -1571,7 +1571,7 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. - `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. +- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional. **Returned values** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index beb7a0503b9..da8ed1f51ba 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -30,7 +30,7 @@ At least four data points are required in `series` to detect outliers. **Returned value** -- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../../sql-reference/data-types/array.md). +- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../data-types/array.md). **Examples** @@ -79,8 +79,8 @@ seriesPeriodDetectFFT(series); **Returned value** -- A real value equal to the period of series data. [Float64](../../sql-reference/data-types/float.md). -- Returns NAN when number of data points are less than four. [nan](../../sql-reference/data-types/float.md/#nan-and-inf). +- A real value equal to the period of series data. [Float64](../data-types/float.md). +- Returns NAN when number of data points are less than four. [nan](../data-types/float.md/#nan-and-inf). **Examples** @@ -130,7 +130,7 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** - An array of four arrays where the first array include seasonal components, the second array - trend, -the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../../sql-reference/data-types/array.md). +the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../data-types/array.md). **Examples** diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2b5f093c149..2cec1987c20 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -17,8 +17,8 @@ tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. +- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** @@ -51,9 +51,9 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. +- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. +- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index b4fa442a637..0663be08240 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -35,7 +35,7 @@ tupleElement(tuple, name, [, default_value]) ## untuple -Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location. +Performs syntactic substitution of [tuple](../data-types/tuple.md#tuplet1-t2) elements in the call location. The names of the result columns are implementation-specific and subject to change. Do not assume specific column names after `untuple`. @@ -49,7 +49,7 @@ You can use the `EXCEPT` expression to skip columns as a result of the query. **Arguments** -- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). +- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../data-types/tuple.md). **Returned value** @@ -111,7 +111,7 @@ Result: **See Also** -- [Tuple](../../sql-reference/data-types/tuple.md) +- [Tuple](../data-types/tuple.md) ## tupleHammingDistance @@ -125,8 +125,8 @@ tupleHammingDistance(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). Tuples should have the same type of the elements. @@ -198,11 +198,11 @@ tupleToNameValuePairs(tuple) **Arguments** -- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. +- `tuple` — Named tuple. [Tuple](../data-types/tuple.md) with any types of values. **Returned value** -- An array with (name, value) pairs. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). +- An array with (name, value) pairs. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), ...)). **Example** @@ -273,12 +273,12 @@ Alias: `vectorSum`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the sum. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the sum. [Tuple](../data-types/tuple.md). **Example** @@ -310,12 +310,12 @@ Alias: `vectorDifference`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of subtraction. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of subtraction. [Tuple](../data-types/tuple.md). **Example** @@ -345,12 +345,12 @@ tupleMultiply(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the multiplication. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the multiplication. [Tuple](../data-types/tuple.md). **Example** @@ -380,12 +380,12 @@ tupleDivide(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of division. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of division. [Tuple](../data-types/tuple.md). **Example** @@ -415,11 +415,11 @@ tupleNegate(tuple) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../data-types/tuple.md). **Returned value** -- Tuple with the result of negation. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with the result of negation. [Tuple](../data-types/tuple.md). **Example** @@ -449,12 +449,12 @@ tupleMultiplyByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `number` — Multiplier. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- Tuple with multiplied values. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with multiplied values. [Tuple](../data-types/tuple.md). **Example** @@ -484,12 +484,12 @@ tupleDivideByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../data-types/tuple.md). +- `number` — Divider. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md). **Returned value** -- Tuple with divided values. [Tuple](../../sql-reference/data-types/tuple.md). +- Tuple with divided values. [Tuple](../data-types/tuple.md). **Example** @@ -517,7 +517,7 @@ tupleConcat(tuples) **Arguments** -- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. +- `tuples` – Arbitrary number of arguments of [Tuple](../data-types/tuple.md) type. **Example** diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index f02c8fde06c..d9c18e2a0a2 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -6,7 +6,7 @@ sidebar_label: Maps ## map -Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type. +Arranges `key:value` pairs into [Map(key, value)](../data-types/map.md) data type. **Syntax** @@ -16,12 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. Arbitrary type, except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key` — The key part of the pair. Arbitrary type, except [Nullable](../data-types/nullable.md) and [LowCardinality](../data-types/lowcardinality.md) nested with [Nullable](../data-types/nullable.md). +- `value` — The value part of the pair. Arbitrary type, including [Map](../data-types/map.md) and [Array](../data-types/array.md). **Returned value** -- Data structure as `key:value` pairs. [Map(key, value)](../../sql-reference/data-types/map.md). +- Data structure as `key:value` pairs. [Map(key, value)](../data-types/map.md). **Examples** @@ -61,11 +61,11 @@ Result: **See Also** -- [Map(key, value)](../../sql-reference/data-types/map.md) data type +- [Map(key, value)](../data-types/map.md) data type ## mapFromArrays -Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing. +Merges an [Array](../data-types/array.md) of keys and an [Array](../data-types/array.md) of values into a [Map(key, value)](../data-types/map.md). Notice that the second argument could also be a [Map](../data-types/map.md), thus it is casted to an Array when executing. The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. @@ -81,7 +81,7 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** -- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) +- `keys` — Given key array to create a map from. The nested type of array must be: [String](../data-types/string.md), [Integer](../data-types/int-uint.md), [LowCardinality](../data-types/lowcardinality.md), [FixedString](../data-types/fixedstring.md), [UUID](../data-types/uuid.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [Date32](../data-types/date32.md), [Enum](../data-types/enum.md) - `values` - Given value array or map to create a map from. **Returned value** @@ -109,7 +109,7 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) ## extractKeyValuePairs -Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). +Extracts key-value pairs, i.e. a [Map(String, String)](../data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. @@ -125,14 +125,14 @@ Alias: **Arguments** -- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs. +- A [Map(String, String)](../data-types/map.md) of key-value pairs. **Examples** @@ -221,11 +221,11 @@ mapAdd(arg1, arg2 [, ...]) **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -269,11 +269,11 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -322,21 +322,21 @@ For array arguments the number of elements in `keys` and `values` must be the sa **Arguments** -Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. +Arguments are [maps](../data-types/map.md) or two [arrays](../data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key. Mapped arrays: -- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). +- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges). or -- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). +- `map` — Map with integer keys. [Map](../data-types/map.md). **Returned value** -- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. +- Depending on the arguments returns a [map](../data-types/map.md) or a [tuple](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. **Example** @@ -380,12 +380,12 @@ mapContains(map, key) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `key` — Key. Type matches the type of keys of `map` parameter. **Returned value** -- `1` if `map` contains `key`, `0` if not. [UInt8](../../sql-reference/data-types/int-uint.md). +- `1` if `map` contains `key`, `0` if not. [UInt8](../data-types/int-uint.md). **Example** @@ -413,7 +413,7 @@ Result: Returns all keys from the `map` parameter. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`. **Syntax** @@ -423,11 +423,11 @@ mapKeys(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). **Returned value** -- Array containing all keys from the `map`. [Array](../../sql-reference/data-types/array.md). +- Array containing all keys from the `map`. [Array](../data-types/array.md). **Example** @@ -454,7 +454,7 @@ Result: Returns all values from the `map` parameter. -Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`. **Syntax** @@ -464,11 +464,11 @@ mapValues(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). **Returned value** -- Array containing all the values from `map`. [Array](../../sql-reference/data-types/array.md). +- Array containing all the values from `map`. [Array](../data-types/array.md). **Example** @@ -500,7 +500,7 @@ mapContainsKeyLike(map, pattern) ``` **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `pattern` - String pattern to match. **Returned value** @@ -538,7 +538,7 @@ mapExtractKeyLike(map, pattern) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../data-types/map.md). - `pattern` - String pattern to match. **Returned value** @@ -577,7 +577,7 @@ mapApply(func, map) **Arguments** - `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `map` — [Map](../data-types/map.md). **Returned value** @@ -617,7 +617,7 @@ mapFilter(func, map) **Arguments** - `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `map` — [Map](../data-types/map.md). **Returned value** @@ -658,8 +658,8 @@ mapUpdate(map1, map2) **Arguments** -- `map1` [Map](../../sql-reference/data-types/map.md). -- `map2` [Map](../../sql-reference/data-types/map.md). +- `map1` [Map](../data-types/map.md). +- `map2` [Map](../data-types/map.md). **Returned value** @@ -691,7 +691,7 @@ mapConcat(maps) **Arguments** -- `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type. +- `maps` – Arbitrary number of arguments of [Map](../data-types/map.md) type. **Returned value** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index f1c2e92f201..d123f317dc6 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -51,7 +51,7 @@ SETTINGS cast_keep_nullable = 1 ## toInt(8\|16\|32\|64\|128\|256) -Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to a value the [Int](../data-types/int-uint.md) data type. This function family includes: - `toInt8(expr)` — Converts to a value of data type `Int8`. - `toInt16(expr)` — Converts to a value of data type `Int16`. @@ -62,7 +62,7 @@ Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/i **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -90,7 +90,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrZero -Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. +Takes an argument of type [String](../data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. **Example** @@ -151,7 +151,7 @@ Result: ## toUInt(8\|16\|32\|64\|256) -Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: +Converts an input value to the [UInt](../data-types/int-uint.md) data type. This function family includes: - `toUInt8(expr)` — Converts to a value of data type `UInt8`. - `toUInt16(expr)` — Converts to a value of data type `UInt16`. @@ -161,7 +161,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -169,7 +169,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. **Example** @@ -203,9 +203,9 @@ Result: ## toDate -Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type. +Converts the argument to [Date](../data-types/date.md) data type. -If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: +If the argument is [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), it truncates it and leaves the date component of the DateTime: ```sql SELECT @@ -219,7 +219,7 @@ SELECT └─────────────────────┴───────────────┘ ``` -If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used: +If the argument is a [String](../data-types/string.md), it is parsed as [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). If it was parsed as [DateTime](../data-types/datetime.md), the date component is being used: ```sql SELECT @@ -247,7 +247,7 @@ SELECT └────────────┴───────────────────────────────────────────┘ ``` -If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone: +If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](../data-types/datetime.md), then truncated to [Date](../data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](../data-types/date.md) depends on the timezone: ```sql SELECT @@ -276,7 +276,7 @@ date_Samoa_2: 2022-12-31 The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones. -If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: +If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](../data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example: ```sql SELECT toDate(12345) @@ -317,7 +317,7 @@ SELECT ## toDateOrZero -The same as [toDate](#todate) but returns lower boundary of [Date](/docs/en/sql-reference/data-types/date.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDate](#todate) but returns lower boundary of [Date](../data-types/date.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -338,7 +338,7 @@ Result: ## toDateOrNull -The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -359,7 +359,7 @@ Result: ## toDateOrDefault -Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](/docs/en/sql-reference/data-types/date.md). +Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](../data-types/date.md). **Syntax** @@ -386,7 +386,7 @@ Result: ## toDateTime -Converts an input value to [DateTime](/docs/en/sql-reference/data-types/datetime.md). +Converts an input value to [DateTime](../data-types/datetime.md). **Syntax** @@ -396,18 +396,18 @@ toDateTime(expr[, time_zone ]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). -- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — The value. [String](../data-types/string.md), [Int](../data-types/int-uint.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). +- `time_zone` — Time zone. [String](../data-types/string.md). :::note If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). -If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. +If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed. ::: **Returned value** -- A date time. [DateTime](/docs/en/sql-reference/data-types/datetime.md) +- A date time. [DateTime](../data-types/datetime.md) **Example** @@ -428,7 +428,7 @@ Result: ## toDateTimeOrZero -The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](../data-types/datetime.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -449,7 +449,7 @@ Result: ## toDateTimeOrNull -The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported. +The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported. **Example** @@ -470,7 +470,7 @@ Result: ## toDateTimeOrDefault -Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md). +Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](../data-types/datetime.md). **Syntax** @@ -497,7 +497,7 @@ Result: ## toDate32 -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. +Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. **Syntax** @@ -507,11 +507,11 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md) or [Date](../data-types/date.md). **Returned value** -- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). +- A calendar date. Type [Date32](../data-types/date32.md). **Example** @@ -539,7 +539,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value); └────────────┴────────────────────────────────────┘ ``` -3. With [Date](/docs/en/sql-reference/data-types/date.md) argument: +3. With [Date](../data-types/date.md) argument: ``` sql SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); @@ -553,7 +553,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value); ## toDate32OrZero -The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received. +The same as [toDate32](#todate32) but returns the min value of [Date32](../data-types/date32.md) if an invalid argument is received. **Example** @@ -593,7 +593,7 @@ Result: ## toDate32OrDefault -Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. +Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received. **Example** @@ -615,7 +615,7 @@ Result: ## toDateTime64 -Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. +Converts the argument to the [DateTime64](../data-types/datetime64.md) data type. **Syntax** @@ -625,13 +625,13 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. - `timezone` - Time zone of the specified datetime64 object. **Returned value** -- A calendar date and time of day, with sub-second precision. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- A calendar date and time of day, with sub-second precision. [DateTime64](../data-types/datetime64.md). **Example** @@ -692,7 +692,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDecimal(32\|64\|128\|256) -Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts `value` to the [Decimal](../data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. - `toDecimal32(value, S)` - `toDecimal64(value, S)` @@ -701,7 +701,7 @@ Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) ## toDecimal(32\|64\|128\|256)OrNull -Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: +Converts an input string to a [Nullable(Decimal(P,S))](../data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. - `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. @@ -712,7 +712,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -755,7 +755,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrDefault -Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: +Converts an input string to a [Decimal(P,S)](../data-types/decimal.md) data type value. This family of functions includes: - `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. @@ -766,7 +766,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -808,7 +808,7 @@ Result: ## toDecimal(32\|64\|128\|256)OrZero -Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: +Converts an input value to the [Decimal(P,S)](../data-types/decimal.md) data type. This family of functions includes: - `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. - `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. @@ -819,7 +819,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -919,7 +919,7 @@ Also see the `toUnixTimestamp` function. ## toFixedString(s, N) -Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N). +Converts a [String](../data-types/string.md) type argument to a [FixedString(N)](../data-types/fixedstring.md) type (a string of fixed length N). If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown. ## toStringCutToZero(s) @@ -968,14 +968,14 @@ toDecimalString(number, scale) **Arguments** -- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), -- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). - * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), - * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. +- `number` — Value to be represented as String, [Int, UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](../data-types/int-uint.md). + * Maximum scale for [Decimal](../data-types/decimal.md) and [Int, UInt](../data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](../data-types/float.md) is 60. **Returned value** -- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). +- Input value represented as [String](../data-types/string.md) with given number of fractional digits (scale). The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. **Example** @@ -1017,7 +1017,7 @@ This function accepts a number or date or date with time and returns a FixedStri ## reinterpretAsUUID :::note -In addition to the UUID functions listed here, there is dedicated [UUID function documentation](/docs/en/sql-reference/functions/uuid-functions.md). +In addition to the UUID functions listed here, there is dedicated [UUID function documentation](../functions/uuid-functions.md). ::: Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. @@ -1030,11 +1030,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](../data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). +- The UUID type value. [UUID](../data-types/uuid.md/#uuid-data-type). **Examples** @@ -1087,7 +1087,7 @@ reinterpret(x, type) **Arguments** - `x` — Any type. -- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). +- `type` — Destination type. [String](../data-types/string.md). **Returned value** @@ -1126,7 +1126,7 @@ x::t **Arguments** - `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). +- `T` — The name of the target data type. [String](../data-types/string.md). - `t` — The target data type. **Returned value** @@ -1175,9 +1175,9 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Conversion to [FixedString (N)](../data-types/fixedstring.md) only works for arguments of type [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported. +Type conversion to [Nullable](../data-types/nullable.md) and back is supported. **Example** @@ -1251,7 +1251,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) -Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type. +Converts input value `x` to the specified data type `T`. Always returns [Nullable](../data-types/nullable.md) type and returns [NULL](../syntax.md/#null-literal) if the casted value is not representable in the target type. **Syntax** @@ -1360,7 +1360,7 @@ Result: ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) -Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type. +Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type. **Syntax** @@ -1407,9 +1407,9 @@ Result: ## parseDateTime {#type_conversion_functions-parseDateTime} -Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). +Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). -This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime). +This function is the opposite operation of function [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime). **Syntax** @@ -1429,7 +1429,7 @@ Returns DateTime values parsed from input string according to a MySQL style form **Supported format specifiers** -All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except: +All format specifiers listed in [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime) except: - %Q: Quarter (1-4) **Example** @@ -1458,7 +1458,7 @@ Alias: `str_to_date`. Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. -This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). +This function is the opposite operation of function [formatDateTimeInJodaSyntax](../functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). **Syntax** @@ -1478,7 +1478,7 @@ Returns DateTime values parsed from input string according to a Joda style forma **Supported format specifiers** -All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: +All format specifiers listed in [formatDateTimeInJoda](../functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: - S: fraction of second - z: time zone - Z: time zone offset/id @@ -1504,7 +1504,7 @@ Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTime ## parseDateTimeBestEffort ## parseDateTime32BestEffort -Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type. +Converts a date and time in the [String](../data-types/string.md) representation to [DateTime](../data-types/datetime.md/#data_type-datetime) data type. The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse’s and some other date and time formats. @@ -1516,8 +1516,8 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](../data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../data-types/string.md). **Supported non-standard formats** @@ -1533,7 +1533,7 @@ If the year is not specified, it is considered to be equal to the current year. **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](../data-types/datetime.md) data type. **Examples** @@ -1665,7 +1665,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except ## parseDateTime64BestEffort -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../functions/type-conversion-functions.md/#data_type-datetime) data type. **Syntax** @@ -1675,13 +1675,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Arguments** -- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](../data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](../data-types/datetime.md) data type. **Examples** @@ -1731,7 +1731,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that ## toLowCardinality -Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. +Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type. To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. @@ -1743,7 +1743,7 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `expr` — [Expression](../syntax.md/#syntax-expressions) resulting in one of the [supported data types](../data-types/index.md/#data_types). **Returned values** @@ -1978,7 +1978,7 @@ Result: ## snowflakeToDateTime -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format. +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format. **Syntax** @@ -1988,12 +1988,12 @@ snowflakeToDateTime(value[, time_zone]) **Arguments** -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). **Returned value** -- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value. +- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value. **Example** @@ -2014,7 +2014,7 @@ Result: ## snowflakeToDateTime64 -Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format. +Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format. **Syntax** @@ -2024,12 +2024,12 @@ snowflakeToDateTime64(value[, time_zone]) **Arguments** -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). **Returned value** -- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision. +- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision. **Example** @@ -2050,7 +2050,7 @@ Result: ## dateTimeToSnowflake -Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. +Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -2060,11 +2060,11 @@ dateTimeToSnowflake(value) **Arguments** -- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `value` — Date with time. [DateTime](../data-types/datetime.md). **Returned value** -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** @@ -2084,7 +2084,7 @@ Result: ## dateTime64ToSnowflake -Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. +Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -2094,11 +2094,11 @@ dateTime64ToSnowflake(value) **Arguments** -- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- `value` — Date with time. [DateTime64](../data-types/datetime64.md). **Returned value** -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md index b4e3fc2d164..dc6a803d638 100644 --- a/docs/en/sql-reference/functions/ulid-functions.md +++ b/docs/en/sql-reference/functions/ulid-functions.md @@ -18,7 +18,7 @@ generateULID([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -60,12 +60,12 @@ ULIDStringToDateTime(ulid[, timezone]) **Arguments** -- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `ulid` — Input ULID. [String](../data-types/string.md) or [FixedString(26)](../data-types/fixedstring.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. [DateTime64(3)](../data-types/datetime64.md). **Usage example** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index cc826b0bba4..130f0147ca1 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -28,7 +28,7 @@ domain(url) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -77,7 +77,7 @@ topLevelDomain(url) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -89,8 +89,8 @@ https://clickhouse.com/time/ **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. [String](../../sql-reference/data-types/string.md). -- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../../sql-reference/data-types/string.md). +- Domain name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md). +- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../data-types/string.md). **Example** @@ -153,12 +153,12 @@ cutToFirstSignificantSubdomainCustom(URL, TLD) **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../data-types/string.md). +- `TLD` — Custom TLD list name. [String](../data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../data-types/string.md). **Example** @@ -205,12 +205,12 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../data-types/string.md). +- `TLD` — Custom TLD list name. [String](../data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../../sql-reference/data-types/string.md). +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../data-types/string.md). **Example** @@ -257,12 +257,12 @@ firstSignificantSubdomainCustom(URL, TLD) **Arguments** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../data-types/string.md). +- `TLD` — Custom TLD list name. [String](../data-types/string.md). **Returned value** -- First significant subdomain. [String](../../sql-reference/data-types/string.md). +- First significant subdomain. [String](../data-types/string.md). **Example** @@ -408,7 +408,7 @@ netloc(URL) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../data-types/string.md). **Returned value** @@ -462,8 +462,8 @@ cutURLParameter(URL, name) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). -- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. +- `url` — URL. [String](../data-types/string.md). +- `name` — name of URL parameter. [String](../data-types/string.md) or [Array](../data-types/array.md) of Strings. **Returned value** diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index a16663afc5b..a4e4037eedc 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -18,7 +18,7 @@ generateUUIDv4([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -90,7 +90,7 @@ generateUUIDv7([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -163,7 +163,7 @@ generateUUIDv7ThreadMonotonic([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -233,7 +233,7 @@ generateUUIDv7NonMonotonic([expr]) **Arguments** -- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. +- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional. **Returned value** @@ -379,8 +379,8 @@ Result: **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). -- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). +- `string` — String of 36 characters or FixedString(36). [String](../syntax.md#string). +- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](../data-types/uuid.md). **Returned value** @@ -478,7 +478,7 @@ Result: ## UUIDStringToNum -Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). +Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). **Syntax** @@ -488,7 +488,7 @@ UUIDStringToNum(string[, variant = 1]) **Arguments** -- `string` — A [String](../../sql-reference/syntax.md#syntax-string-literal) of 36 characters or [FixedString](../../sql-reference/syntax.md#syntax-string-literal) +- `string` — A [String](../syntax.md#syntax-string-literal) of 36 characters or [FixedString](../syntax.md#syntax-string-literal) - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -537,7 +537,7 @@ UUIDNumToString(binary[, variant = 1]) **Arguments** -- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. +- `binary` — [FixedString(16)](../data-types/fixedstring.md) as a binary representation of a UUID. - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -576,7 +576,7 @@ Result: ## UUIDToNum -Accepts a [UUID](../../sql-reference/data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID. +Accepts a [UUID](../data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID. **Syntax** @@ -636,11 +636,11 @@ UUIDv7ToDateTime(uuid[, timezone]) **Arguments** - `uuid` — [UUID](../data-types/uuid.md) of version 7. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). +- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](../data-types/datetime64.md). **Usage examples** @@ -684,4 +684,4 @@ serverUUID() ## See also -- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) +- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index 043686889c4..03251f0b9af 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -432,13 +432,13 @@ regionIn(lhs, rhs\[, geobase\]) **Parameters** -- `lhs` — Lhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). -- `rhs` — Rhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). +- `lhs` — Lhs region ID from the geobase. [UInt32](../data-types/int-uint). +- `rhs` — Rhs region ID from the geobase. [UInt32](../data-types/int-uint). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. **Returned value** -- 1, if it belongs. [UInt8](../../sql-reference/data-types/int-uint). +- 1, if it belongs. [UInt8](../data-types/int-uint). - 0, if it doesn't belong. **Implementation details** From e87c168bd86a0697621b5692f80b1f64e40337a5 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 06:42:13 +0200 Subject: [PATCH 588/651] Turn multi-line returns into a single line --- .../sql-reference/functions/introspection.md | 13 ++--- .../functions/splitting-merging-functions.md | 3 +- .../functions/string-search-functions.md | 58 +++++++------------ .../functions/time-series-functions.md | 3 +- .../sql-reference/functions/url-functions.md | 6 +- 5 files changed, 31 insertions(+), 52 deletions(-) diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 540e148e3f1..5dc57e70591 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -112,9 +112,11 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so ## addressToLineWithInlines -Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price. +Similar to `addressToLine`, but returns an Array with all inline functions. As a result of this, it is slower than `addressToLine`. +:::note If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. +::: **Syntax** @@ -128,11 +130,7 @@ addressToLineWithInlines(address_of_binary_instruction) **Returned value** -- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. - -- Array with single element which is name of a binary, if the function couldn’t find the debug information. - -- Empty array, if the address is not valid. [Array(String)](../data-types/array.md). +- An array whose first element is the source code filename and line number in the file delimited by a colon. From the second element onwards, inline functions' source code filenames, line numbers and function names are listed. If the function couldn’t find the debug information, then an array with a single element equal to the name of the binary is returned, otherwise an empty array is returned if the address is not valid. [Array(String)](../data-types/array.md). **Example** @@ -324,8 +322,7 @@ demangle(symbol) **Returned value** -- Name of the C++ function. [String](../data-types/string.md). -- Empty string if a symbol is not valid. [String](../data-types/string.md). +- Name of the C++ function, or an empty string if the symbol is not valid. [String](../data-types/string.md). **Example** diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 9ec4ee974c4..a3c28504a29 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -328,8 +328,7 @@ extractAllGroups(text, regexp) **Returned values** -- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). [Array](../data-types/array.md). -- If there is no matching group, returns an empty array. [Array](../data-types/array.md). +- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). If there is no matching group, it returns an empty array. [Array](../data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 07f776906e6..d261cff3580 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -40,7 +40,7 @@ Alias: - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** - Starting position in bytes and counting from 1, if the substring was found. [UInt64](../data-types/int-uint.md). - 0, if the substring was not found. [UInt64](../data-types/int-uint.md). @@ -206,7 +206,7 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Substrings to be searched. [Array](../data-types/array.md). -**Returned values** +**Returned value** - Array of the starting position in bytes and counting from 1, if the substring was found. - 0, if the substring was not found. @@ -492,8 +492,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -526,8 +525,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -560,8 +558,7 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN]) **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle, Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -596,8 +593,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl **Returned value** -- index (starting from 1) of the leftmost found needle. -- 0, if there was no match. +- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md). **Example** @@ -1340,7 +1336,7 @@ countSubstrings(haystack, needle[, start_pos]) - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** - The number of occurrences. [UInt64](../data-types/int-uint.md). @@ -1387,7 +1383,7 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos]) - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** - The number of occurrences. [UInt64](../data-types/int-uint.md). @@ -1439,7 +1435,7 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** - The number of occurrences. [UInt64](../data-types/int-uint.md). @@ -1573,7 +1569,7 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. - `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). - `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional. -**Returned values** +**Returned value** `pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. [String](../data-types/string.md). @@ -1612,10 +1608,9 @@ hasSubsequence(haystack, needle) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). -- 0, otherwise. [UInt8](../data-types/int-uint.md). +- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1648,10 +1643,9 @@ hasSubsequenceCaseInsensitive(haystack, needle) - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). -- 0, otherwise. [UInt8](../data-types/int-uint.md). +- 1, if needle is a subsequence of haystack, 0 otherwise [UInt8](../data-types/int-uint.md). **Examples** @@ -1684,10 +1678,9 @@ hasSubsequenceUTF8(haystack, needle) - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). -- 0, otherwise. [UInt8](../data-types/int-uint.md). +- 1, if needle is a subsequence of haystack, 0, otherwise. [UInt8](../data-types/int-uint.md). Query: @@ -1720,10 +1713,9 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle) - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). -**Returned values** +**Returned value** -- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md). -- 0, otherwise. [UInt8](../data-types/int-uint.md). +- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Examples** @@ -1758,8 +1750,7 @@ hasToken(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if the token is not present. +- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Implementation details** @@ -1794,9 +1785,7 @@ hasTokenOrNull(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if the token is not present in the haystack. -- null, if the token is ill-formed. +- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed. **Implementation details** @@ -1833,8 +1822,7 @@ hasTokenCaseInsensitive(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, otherwise. +- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md). **Implementation details** @@ -1869,9 +1857,7 @@ hasTokenCaseInsensitiveOrNull(haystack, token) **Returned value** -- 1, if the token is present in the haystack. -- 0, if token is not present. -- null, if the token is ill-formed. +- 1, if the token is present in the haystack, 0 if the token is not present, otherwise [`null`](../data-types/nullable.md) if the token is ill-formed. [UInt8](../data-types/int-uint.md). **Implementation details** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index da8ed1f51ba..ce5dea14ec5 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -79,8 +79,7 @@ seriesPeriodDetectFFT(series); **Returned value** -- A real value equal to the period of series data. [Float64](../data-types/float.md). -- Returns NAN when number of data points are less than four. [nan](../data-types/float.md/#nan-and-inf). +- A real value equal to the period of series data. NaN when number of data points are less than four. [Float64](../data-types/float.md). **Examples** diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 130f0147ca1..47890e0b271 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -48,8 +48,7 @@ clickhouse.com **Returned values** -- Host name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md). -- Empty string. If ClickHouse can’t parse the input string as a URL. [String](../data-types/string.md). +- Host name if ClickHouse can parse the input string as a URL, otherwise an empty string. [String](../data-types/string.md). **Example** @@ -89,8 +88,7 @@ https://clickhouse.com/time/ **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md). -- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../data-types/string.md). +- Domain name if ClickHouse can parse the input string as a URL. Otherwise, an empty string. [String](../data-types/string.md). **Example** From 67ff6883fd11422231d029cf5a128dd5b87dbdfa Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 06:51:38 +0200 Subject: [PATCH 589/651] Restore original formatting for logical functions and, or, not, xor --- .../functions/logical-functions.md | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 8448dd4ff12..7222dbeeb0d 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -30,9 +30,11 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope **Returned value** -- `0`, if at least one argument evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). -- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`. [NULL](../../sql-reference/syntax.md/#null). -- `1`, otherwise. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). +- `0`, if at least one argument evaluates to `false`, +- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`, +- `1`, otherwise. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -86,7 +88,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera - `0`, if all arguments evaluate to `false`, - `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`. -Type: [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -134,9 +136,11 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne **Returned value** -- `1`, if `val` evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). -- `0`, if `val` evaluates to `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). -- `NULL`, if `val` is `NULL`. [NULL](../../sql-reference/syntax.md/#null). +- `1`, if `val` evaluates to `false`, +- `0`, if `val` evaluates to `true`, +- `NULL`, if `val` is `NULL`. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -168,9 +172,11 @@ xor(val1, val2...) **Returned value** -- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). -- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)). -- `NULL`, if at least one of the inputs is `NULL`. [NULL](../../sql-reference/syntax.md/#null). +- `1`, for two values: if one of the values evaluates to `false` and other does not, +- `0`, for two values: if both values evaluate to `false` or to both `true`, +- `NULL`, if at least one of the inputs is `NULL` + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** From 3071909aca68d73b0e29660896f883ff759ef48e Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 07:00:47 +0200 Subject: [PATCH 590/651] Revert roundAge to original formatting --- .../sql-reference/functions/rounding-functions.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index ab344f664fd..c2998a82205 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -328,14 +328,15 @@ roundAge(num) **Returned value** -- Returns `0`, for $age \lt 1$. [UInt8](../data-types/int-uint.md). -- Returns `17`, for $1 \leq age \leq 17$. [UInt8](../data-types/int-uint.md). -- Returns `18`, for $18 \leq age \leq 24$. [UInt8](../data-types/int-uint.md). -- Returns `25`, for $25 \leq age \leq 34$. [UInt8](../data-types/int-uint.md). -- Returns `35`, for $35 \leq age \leq 44$. [UInt8](../data-types/int-uint.md). -- Returns `45`, for $45 \leq age \leq 54$. [UInt8](../data-types/int-uint.md). -- Returns `55`, for $age \geq 55$. [UInt8](../data-types/int-uint.md). +- Returns `0`, for $age \lt 1$. +- Returns `17`, for $1 \leq age \leq 17$. +- Returns `18`, for $18 \leq age \leq 24$. +- Returns `25`, for $25 \leq age \leq 34$. +- Returns `35`, for $35 \leq age \leq 44$. +- Returns `45`, for $45 \leq age \leq 54$. +- Returns `55`, for $age \geq 55$. +Type: [UInt8](../data-types/int-uint.md). **Example** Query: From b19c5ad13ac56d0e2cf6d0b5361ef7992b18e29b Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 07:01:15 +0200 Subject: [PATCH 591/651] Revert roundAge to original formatting --- docs/en/sql-reference/functions/rounding-functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index c2998a82205..d18185c5013 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -337,6 +337,7 @@ roundAge(num) - Returns `55`, for $age \geq 55$. Type: [UInt8](../data-types/int-uint.md). + **Example** Query: From 8783647703ec60eb936824c0265a298a33e9ae43 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 07:03:15 +0200 Subject: [PATCH 592/651] Revert addressToLine to original formatting --- docs/en/sql-reference/functions/introspection.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 5dc57e70591..bec97208843 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -40,10 +40,12 @@ addressToLine(address_of_binary_instruction) **Returned value** -- Source code filename and the line number in this file delimited by colon. [String](../data-types/string.md). - - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. -- Name of a binary, if the function couldn’t find the debug information. [String](../data-types/string.md). -- Empty string, if the address is not valid. [String](../data-types/string.md). +- Source code filename and the line number in this file delimited by colon. + For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. +- Name of a binary, if the function couldn’t find the debug information. +- Empty string, if the address is not valid. + +Type: [String](../../sql-reference/data-types/string.md). **Example** From c638de90c2d6e0a2aa48d2eadd763ad7aa47e3a7 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 10:01:06 +0200 Subject: [PATCH 593/651] Fix incorrectly placed :::note blocks --- .../sql-reference/functions/splitting-merging-functions.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index a3c28504a29..20d63d84628 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -27,13 +27,11 @@ splitByChar(separator, s[, max_substrings])) - An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). -:::note Empty substrings may be selected when: - A separator occurs at the beginning or end of the string; - There are multiple consecutive separators; - The original string `s` is empty. -::: :::note The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list. @@ -80,13 +78,13 @@ splitByString(separator, s[, max_substrings])) - An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). -:::note Empty substrings may be selected when: - A non-empty separator occurs at the beginning or end of the string; - There are multiple consecutive non-empty separators; - The original string `s` is empty while the separator is not empty. +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. ::: @@ -137,13 +135,14 @@ splitByRegexp(regexp, s[, max_substrings])) - An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)). -:::note + Empty substrings may be selected when: - A non-empty regular expression match occurs at the beginning or end of the string; - There are multiple consecutive non-empty regular expression matches; - The original string `s` is empty while the regular expression is not empty. +:::note Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. ::: From 480f911c7664c15cccf913b0b7cc3d66645c557c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 May 2024 08:33:44 +0000 Subject: [PATCH 594/651] Fix spelling --- .../aspell-ignore/en/aspell-dict.txt | 117 +++++++++--------- 1 file changed, 59 insertions(+), 58 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 1c601bc200a..6df2e426561 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -6,6 +6,7 @@ AMPLab AMQP ANNIndex ANNIndexes +ANOVA AORM APIs ARMv @@ -29,13 +30,6 @@ Alexey AnyEvent AppleClang Approximative -arrayDotProduct -arrayEnumerateDenseRanked -arrayEnumerateUniqRanked -arrayFirstOrNull -arrayLastOrNull -arrayPartialShuffle -arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize @@ -53,8 +47,6 @@ AutoFDO AutoML Autocompletion AvroConfluent -analysisOfVariance -ANOVA BIGINT BIGSERIAL BORO @@ -186,7 +178,6 @@ ComplexKeyCache ComplexKeyDirect ComplexKeyHashed Composable -composable Config ConnectionDetails Const @@ -396,8 +387,6 @@ InterserverThreads IsPentagon IsResClassIII IsValid -isNotDistinctFrom -isNullable JBOD JOINed JOINs @@ -466,8 +455,6 @@ KittenHouse Klickhouse Kolmogorov Konstantin -kostik -kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -477,9 +464,8 @@ LLDB LLVM's LOCALTIME LOCALTIMESTAMP -LOONGARCH LONGLONG -LoongArch +LOONGARCH Levenshtein Liao LibFuzzer @@ -497,6 +483,7 @@ LocalThreadActive LogQL Logstash LookML +LoongArch LowCardinality LpDistance LpNorm @@ -571,17 +558,6 @@ MindsDB Mongodb Monotonicity MsgPack -multiSearchAllPositionsCaseInsensitive -multiSearchAllPositionsCaseInsensitiveUTF -multiSearchAnyCaseInsensitive -multiSearchAnyCaseInsensitiveUTF -multiSearchAnyUTF -multiSearchFirstIndexCaseInsensitive -multiSearchFirstIndexCaseInsensitiveUTF -multiSearchFirstIndexUTF -multiSearchFirstPositionCaseInsensitive -multiSearchFirstPositionCaseInsensitiveUTF -multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries @@ -683,8 +659,8 @@ OSUserTimeNormalized OTLP OUTFILE ObjectId -Observability Oblakov +Observability Octonica Ok OnTime @@ -765,7 +741,6 @@ Promtail Protobuf ProtobufSingle ProxySQL -proportionsZTest Punycode PyArrow PyCharm @@ -886,7 +861,6 @@ Simhash SimpleAggregateFunction SimpleState SipHash -sigmoid Smirnov's Smirnov'test Soundex @@ -932,7 +906,6 @@ TAVG TCPConnection TCPThreads TDigest -ThreadMonotonic TINYINT TLSv TMAX @@ -958,7 +931,6 @@ TablesLoaderForegroundThreads TablesLoaderForegroundThreadsActive TablesToDropQueueSize TargetSpecific -tanh Telegraf TemplateIgnoreSpaces TemporaryFilesForAggregation @@ -968,6 +940,7 @@ TemporaryFilesUnknown Testflows Tgz Theil's +ThreadMonotonic ThreadPoolFSReaderThreads ThreadPoolFSReaderThreadsActive ThreadPoolRemoteFSReaderThreads @@ -1028,7 +1001,6 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unshuffled Updatable Uppercased Uptime @@ -1095,6 +1067,7 @@ activerecord addDate addDays addHours +addInterval addMicroseconds addMilliseconds addMinutes @@ -1102,10 +1075,9 @@ addMonths addNanoseconds addQuarters addSeconds +addTupleOfIntervals addWeeks addYears -addInterval -addTupleOfIntervals addr addressToLine addressToLineWithInlines @@ -1120,6 +1092,7 @@ aiochclient allocator alphaTokens amplab +analysisOfVariance analytics anonymize anonymized @@ -1147,15 +1120,19 @@ arrayCumSum arrayCumSumNonNegative arrayDifference arrayDistinct +arrayDotProduct arrayElement arrayEnumerate arrayEnumerateDense +arrayEnumerateDenseRanked arrayEnumerateUniq +arrayEnumerateUniqRanked arrayExists arrayFill arrayFilter arrayFirst arrayFirstIndex +arrayFirstOrNull arrayFlatten arrayFold arrayIntersect @@ -1163,10 +1140,12 @@ arrayJaccardIndex arrayJoin arrayLast arrayLastIndex +arrayLastOrNull arrayMap arrayMax arrayMin arrayPartialReverseSort +arrayPartialShuffle arrayPartialSort arrayPopBack arrayPopFront @@ -1186,6 +1165,7 @@ arrayRotateRight arrayShiftLeft arrayShiftRight arrayShingles +arrayShuffle arraySlice arraySort arraySplit @@ -1367,6 +1347,7 @@ collapsingmergetree combinator combinators comparising +composable compressability concat concatAssumeInjective @@ -1728,8 +1709,8 @@ hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF hasSubstr -hasToken hasThreadFuzzer +hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull @@ -1802,8 +1783,10 @@ isIPAddressInRange isIPv isInfinite isNaN +isNotDistinctFrom isNotNull isNull +isNullable isValidJSON isValidUTF isZeroOrNull @@ -1855,6 +1838,8 @@ kolmogorovSmirnovTest kolmogorovsmirnovtest kolya konsole +kostik +kostikConsistentHash kurtPop kurtSamp kurtosis @@ -1866,9 +1851,9 @@ laravel largestTriangleThreeBuckets latencies ldap -leftUTF leftPad leftPadUTF +leftUTF lemmatization lemmatize lemmatized @@ -1915,8 +1900,8 @@ logTrace logagent loghouse london -loongarch lookups +loongarch lowcardinality lowerUTF lowercased @@ -1987,8 +1972,8 @@ mispredictions mmap mmapped modularization -moduloOrZero moduli +moduloOrZero mongodb monotonicity monthName @@ -2005,10 +1990,21 @@ multiMatchAllIndices multiMatchAny multiMatchAnyIndex multiSearchAllPositions +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF multiSearchAllPositionsUTF multiSearchAny +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF multiSearchFirstIndex +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF multiSearchFirstPosition +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF multibyte multidirectory multiline @@ -2094,6 +2090,7 @@ ok omclickhouse onstraints ontime +onwards openSSL openSUSE openldap @@ -2205,6 +2202,7 @@ procfs profiler proleptic prometheus +proportionsZTest proto protobuf protobufsingle @@ -2343,8 +2341,8 @@ retentions rethrow retransmit retriable -rewritable reverseUTF +rewritable rightPad rightPadUTF rightUTF @@ -2404,8 +2402,9 @@ sharded sharding shortcircuit shortkeys -showCertificate shoutout +showCertificate +sigmoid simdjson simpleJSON simpleJSONExtractBool @@ -2419,8 +2418,8 @@ simpleLinearRegression simpleaggregatefunction simplelinearregression simpod -singlepart singleValueOrNull +singlepart singlevalueornull sinh sipHash @@ -2465,13 +2464,13 @@ statbox stateful stddev stddevPop -stddevSamp -stddevpop -stddevsamp -stddevpopstable stddevPopStable -stddevsampstable +stddevSamp stddevSampStable +stddevpop +stddevpopstable +stddevsamp +stddevsampstable stderr stdin stdout @@ -2532,6 +2531,7 @@ substrings subtitiles subtractDays subtractHours +subtractInterval subtractMicroseconds subtractMilliseconds subtractMinutes @@ -2539,10 +2539,9 @@ subtractMonths subtractNanoseconds subtractQuarters subtractSeconds +subtractTupleOfIntervals subtractWeeks subtractYears -subtractInterval -subtractTupleOfIntervals subtree subtrees subtype @@ -2551,13 +2550,13 @@ sumCount sumKahan sumMap sumMapFiltered +sumMapFilteredWithOverflow +sumMapWithOverflow sumWithOverflow sumcount sumkahan summap summapwithoverflow -sumMapWithOverflow -sumMapFilteredWithOverflow summingmergetree sumwithoverflow superaggregates @@ -2580,6 +2579,7 @@ tabseparatedrawwithnames tabseparatedrawwithnamesandtypes tabseparatedwithnames tabseparatedwithnamesandtypes +tanh tcp tcpPort tcpnodelay @@ -2714,18 +2714,18 @@ tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs -tupleIntDiv -tupleIntDivByNumber -tupleIntDivOrZero -tupleIntDivOrZeroByNumber -tupleModulo -tupleModuloByNumber turbostat txt typename @@ -2769,6 +2769,7 @@ unrealiable unreplicated unresolvable unrounded +unshuffled untracked untrusted untuple @@ -2779,8 +2780,8 @@ uptime uptrace uring url -urlencoded urlCluster +urlencoded urls usearch userspace From 3e21ff92a38ece0b0ebcf72554e45d33ce612771 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 10:53:19 +0200 Subject: [PATCH 595/651] CI: master workflow with folded jobs --- .github/workflows/master.yml | 825 ++--------------------------- .github/workflows/merge_queue.yml | 6 +- .github/workflows/pull_request.yml | 22 +- 3 files changed, 62 insertions(+), 791 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index d2ea714e4e4..11ec484d208 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -53,13 +53,13 @@ jobs: - name: Re-create GH statuses for skipped jobs if any run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses - BuildDockers: - needs: [RunConfig] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml - with: - data: ${{ needs.RunConfig.outputs.data }} - # Tested in MQ +# Runs in MQ: +# BuildDockers: +# needs: [RunConfig] +# if: ${{ !failure() && !cancelled() }} +# uses: ./.github/workflows/reusable_docker.yml +# with: +# data: ${{ needs.RunConfig.outputs.data }} # StyleCheck: # needs: [RunConfig, BuildDockers] # if: ${{ !failure() && !cancelled() }} @@ -70,262 +70,73 @@ jobs: # data: ${{ needs.RunConfig.outputs.data }} # run_command: | # python3 style_check.py --no-push - CompatibilityCheckX86: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml + + ################################# Main stages ################################# + # for main CI chain + # + Builds_1: + needs: [RunConfig] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} + # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab + uses: ./.github/workflows/reusable_build_stage.yml with: - test_name: Compatibility check (amd64) - runner_type: style-checker + stage: Builds_1 data: ${{ needs.RunConfig.outputs.data }} - CompatibilityCheckAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml + Tests_1: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - test_name: Compatibility check (aarch64) - runner_type: style-checker + stage: Tests_1 data: ${{ needs.RunConfig.outputs.data }} -######################################################################################### -#################################### ORDINARY BUILDS #################################### -######################################################################################### -# TODO: never skip builds! - BuilderDebRelease: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + Builds_2: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }} + uses: ./.github/workflows/reusable_build_stage.yml with: - build_name: package_release - checkout_depth: 0 + stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} - BuilderDebReleaseCoverage: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + Tests_2: + needs: [RunConfig, Builds_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - build_name: package_release_coverage - checkout_depth: 0 + stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - BuilderDebAarch64: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml + # stage for jobs that do not prohibit merge + Tests_3: + needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} + uses: ./.github/workflows/reusable_test_stage.yml with: - build_name: package_aarch64 - checkout_depth: 0 + stage: Tests_3 data: ${{ needs.RunConfig.outputs.data }} - BuilderBinRelease: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_release - checkout_depth: 0 # otherwise we will have no info about contributors - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebAsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_asan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebUBsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_ubsan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebTsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_tsan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebMsan: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_msan - data: ${{ needs.RunConfig.outputs.data }} - BuilderDebDebug: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: package_debug - data: ${{ needs.RunConfig.outputs.data }} -########################################################################################## -##################################### SPECIAL BUILDS ##################################### -########################################################################################## - BuilderBinClangTidy: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_tidy - data: ${{ needs.RunConfig.outputs.data }} - BuilderBinDarwin: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_darwin - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_aarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinFreeBSD: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_freebsd - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinDarwinAarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_darwin_aarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinPPC64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_ppc64le - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAmd64Compat: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_amd64_compat - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAmd64Musl: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_amd64_musl - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinAarch64V80Compat: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_aarch64_v80compat - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinRISCV64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_riscv64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinS390X: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_s390x - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 - BuilderBinLoongarch64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_build.yml - with: - build_name: binary_loongarch64 - data: ${{ needs.RunConfig.outputs.data }} - checkout_depth: 0 -############################################################################################ -##################################### Docker images ####################################### -############################################################################################ - DockerServerImage: - needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Docker server image - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - DockerKeeperImage: - needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Docker keeper image - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################ -##################################### BUILD REPORTER ####################################### -############################################################################################ - BuilderReport: + + ################################# Reports ################################# + # Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2) + Builds_1_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebDebug - - BuilderDebMsan - - BuilderDebRelease - - BuilderDebTsan - - BuilderDebUBsan + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + needs: [RunConfig, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: + Builds_2_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinAarch64 - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderBinFreeBSD - - BuilderBinPPC64 - - BuilderBinRISCV64 - - BuilderBinS390X - - BuilderBinLoongarch64 - - BuilderBinAmd64Compat - - BuilderBinAarch64V80Compat - - BuilderBinClangTidy - - BuilderBinAmd64Musl - - BuilderDebReleaseCoverage - - BuilderBinRelease + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} + needs: [RunConfig, Builds_2] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse special build check runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} + MarkReleaseReady: if: ${{ !failure() && !cancelled() }} - needs: - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - - BuilderDebRelease - - BuilderDebAarch64 - runs-on: [self-hosted, style-checker] + needs: [RunConfig, Builds_1] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Debug run: | @@ -338,7 +149,7 @@ jobs: no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} EOF - name: Not ready - # fail the job to be able restart it + # fail the job to be able to restart it if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} run: exit 1 - name: Check out repository code @@ -349,544 +160,14 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 mark_release_ready.py -############################################################################################ -#################################### INSTALL PACKAGES ###################################### -############################################################################################ - InstallPackagesTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Install packages (amd64) - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 install_check.py "$CHECK_NAME" - InstallPackagesTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Install packages (arm64) - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 install_check.py "$CHECK_NAME" -############################################################################################## -########################### FUNCTIONAl STATELESS TESTS ####################################### -############################################################################################## - FunctionalStatelessTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestReleaseAnalyzerS3Replicated: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestS3Debug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (debug, s3 storage) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestS3Tsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (tsan, s3 storage) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (tsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (msan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (ubsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (debug) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatelessTestAsanAzure: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateless tests (azure, asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -############################ FUNCTIONAl STATEFUL TESTS ####################################### -############################################################################################## - FunctionalStatefulTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (asan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (tsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (msan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (ubsan) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (debug) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - # Parallel replicas - FunctionalStatefulTestDebugParallelReplicas: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (debug, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestUBsanParallelReplicas: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (ubsan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestMsanParallelReplicas: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (msan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestTsanParallelReplicas: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (tsan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestAsanParallelReplicas: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (asan, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - FunctionalStatefulTestReleaseParallelReplicas: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stateful tests (release, ParallelReplicas) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -########################### ClickBench ####################################################### -############################################################################################## - ClickBenchAMD64: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickBench (amd64) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 clickbench.py "$CHECK_NAME" - ClickBenchAarch64: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickBench (aarch64) - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 clickbench.py "$CHECK_NAME" -############################################################################################## -######################################### STRESS TESTS ####################################### -############################################################################################## - StressTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (asan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestTsanAzure: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (azure, tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (msan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (ubsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - StressTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Stress test (debug) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -############################# INTEGRATION TESTS ############################################# -############################################################################################# - IntegrationTestsAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (asan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsAnalyzerAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (asan, old analyzer) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (tsan) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - IntegrationTestsRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Integration tests (release) - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -##################################### AST FUZZERS ############################################ -############################################################################################## - ASTFuzzerTestAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (asan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (tsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestUBSan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (ubsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestMSan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (msan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - ASTFuzzerTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: AST fuzzer (debug) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -#################################### UNIT TESTS ############################################# -############################################################################################# - UnitTestsAsan: - needs: [RunConfig, BuilderDebAsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (asan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsReleaseClang: - needs: [RunConfig, BuilderBinRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (release) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsTsan: - needs: [RunConfig, BuilderDebTsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (tsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsMsan: - needs: [RunConfig, BuilderDebMsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (msan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - UnitTestsUBsan: - needs: [RunConfig, BuilderDebUBsan] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Unit tests (ubsan) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################# -#################################### PERFORMANCE TESTS ###################################### -############################################################################################# - PerformanceComparisonX86: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Performance Comparison - runner_type: stress-tester - data: ${{ needs.RunConfig.outputs.data }} - PerformanceComparisonAarch: - needs: [RunConfig, BuilderDebAarch64] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Performance Comparison Aarch64 - runner_type: func-tester-aarch64 - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -############################ SQLLOGIC TEST ################################################### -############################################################################################## - SQLLogicTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Sqllogic test (release) - runner_type: func-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -##################################### SQL TEST ############################################### -############################################################################################## - SQLTest: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLTest - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} -############################################################################################## -###################################### SQLANCER FUZZERS ###################################### -############################################################################################## - SQLancerTestRelease: - needs: [RunConfig, BuilderDebRelease] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLancer (release) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} - SQLancerTestDebug: - needs: [RunConfig, BuilderDebDebug] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: SQLancer (debug) - runner_type: fuzzer-unit-tester - data: ${{ needs.RunConfig.outputs.data }} FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: - - MarkReleaseReady - - FunctionalStatelessTestDebug - - FunctionalStatelessTestRelease - - FunctionalStatelessTestReleaseAnalyzerS3Replicated - - FunctionalStatelessTestAarch64 - - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan - - FunctionalStatelessTestUBsan - - FunctionalStatelessTestS3Debug - - FunctionalStatelessTestS3Tsan - - FunctionalStatefulTestDebug - - FunctionalStatefulTestRelease - - FunctionalStatefulTestAarch64 - - FunctionalStatefulTestAsan - - FunctionalStatefulTestTsan - - FunctionalStatefulTestMsan - - FunctionalStatefulTestUBsan - - FunctionalStatefulTestDebugParallelReplicas - - FunctionalStatefulTestUBsanParallelReplicas - - FunctionalStatefulTestMsanParallelReplicas - - FunctionalStatefulTestTsanParallelReplicas - - FunctionalStatefulTestAsanParallelReplicas - - FunctionalStatefulTestReleaseParallelReplicas - - StressTestDebug - - StressTestAsan - - StressTestTsan - - StressTestMsan - - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsAnalyzerAsan - - IntegrationTestsTsan - - IntegrationTestsRelease - - PerformanceComparisonX86 - - PerformanceComparisonAarch - - CompatibilityCheckX86 - - CompatibilityCheckAarch64 - - ASTFuzzerTestDebug - - ASTFuzzerTestAsan - - ASTFuzzerTestTsan - - ASTFuzzerTestMSan - - ASTFuzzerTestUBSan - - UnitTestsAsan - - UnitTestsTsan - - UnitTestsMsan - - UnitTestsUBsan - - UnitTestsReleaseClang - - SQLancerTestRelease - - SQLancerTestDebug - - SQLLogicTestRelease - - SQLTest - runs-on: [self-hosted, style-checker] + needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - with: - clear-repository: true - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 97aa0db4cdb..d1b03198485 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -20,7 +20,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true # to ensure correct digests - fetch-depth: 0 # to get version + fetch-depth: 0 # to get a version filter: tree:0 - name: Cancel PR workflow run: | @@ -60,7 +60,7 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: Style check - runner_type: style-checker + runner_type: style-checker-aarch64 run_command: | python3 style_check.py data: ${{ needs.RunConfig.outputs.data }} @@ -85,7 +85,7 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest] - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 48b4a558580..aa570c3ce2f 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -31,7 +31,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true # to ensure correct digests - fetch-depth: 0 # to get version + fetch-depth: 0 # to get a version filter: tree:0 - name: Cancel Sync PR workflow run: | @@ -78,7 +78,7 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: Style check - runner_type: style-checker + runner_type: style-checker-aarch64 run_command: | python3 style_check.py data: ${{ needs.RunConfig.outputs.data }} @@ -98,13 +98,13 @@ jobs: run_command: | python3 fast_test_check.py - ################################# Main statges ################################# + ################################# Main stages ################################# # for main CI chain # Builds_1: needs: [RunConfig, StyleCheck, FastTest] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab + # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml with: stage: Builds_1 @@ -112,7 +112,6 @@ jobs: Tests_1: needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_1 @@ -120,7 +119,6 @@ jobs: Builds_2: needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml with: stage: Builds_2 @@ -128,7 +126,6 @@ jobs: Tests_2: needs: [RunConfig, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} - # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 @@ -182,7 +179,7 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -192,13 +189,6 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py - # FIXME: merge on approval does not work with MQ. Could be fixed by using defaul GH's automerge after some corrections in Mergeable Check status - # - name: Auto merge if approved - # if: ${{ github.event_name != 'merge_group' }} - # run: | - # cd "$GITHUB_WORKSPACE/tests/ci" - # python3 merge_pr.py --check-approved - ############################################################################################# ###################################### JEPSEN TESTS ######################################### @@ -216,5 +206,5 @@ jobs: uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse Keeper Jepsen - runner_type: style-checker + runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} From d4fb2d50e95762838b46356a79e7ba8ecd3e4c5e Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 11:36:28 +0200 Subject: [PATCH 596/651] CI: Sync, Merge check, CI gh's statuses fixes --- .github/workflows/master.yml | 21 +++++++++++---------- .github/workflows/pull_request.yml | 7 +++++-- tests/ci/ci.py | 29 +++++++++++++++++++++++++++-- tests/ci/commit_status_helper.py | 13 +++++++------ tests/ci/finish_check.py | 2 +- tests/ci/merge_pr.py | 1 - 6 files changed, 51 insertions(+), 22 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 11ec484d208..7c55098bdfd 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -27,15 +27,16 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 sync_pr.py --merge || : - - name: Python unit tests - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - echo "Testing the main ci directory" - python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done +# Runs in MQ: +# - name: Python unit tests +# run: | +# cd "$GITHUB_WORKSPACE/tests/ci" +# echo "Testing the main ci directory" +# python3 -m unittest discover -s . -p 'test_*.py' +# for dir in *_lambda/; do +# echo "Testing $dir" +# python3 -m unittest discover -s "$dir" -p 'test_*.py' +# done - name: PrepareRunConfig id: runconfig run: | @@ -162,7 +163,7 @@ jobs: python3 mark_release_ready.py FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index aa570c3ce2f..7d22554473e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -33,9 +33,12 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get a version filter: tree:0 - - name: Cancel Sync PR workflow + - name: Cancel previous Sync PR workflow run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run + - name: Set pending Sync status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --set-pending-status - name: Labels check run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -177,7 +180,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 4afd3f46f9d..fc25bee354d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames +from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames, StatusNames from ci_utils import GHActions, is_hex, normalize_string from clickhouse_helper import ( CiLogsCredentials, @@ -52,7 +52,7 @@ from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner from github_helper import GitHub from pr_info import PRInfo -from report import ERROR, SUCCESS, BuildResult, JobReport +from report import ERROR, SUCCESS, BuildResult, JobReport, PENDING from s3_helper import S3Helper from ci_metadata import CiMetadata from version_helper import get_version_from_repo @@ -996,6 +996,11 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: action="store_true", help="Action that cancels previous running PR workflow if PR added into the Merge Queue", ) + parser.add_argument( + "--set-pending-status", + action="store_true", + help="Action to set needed pending statuses in the beginning of CI workflow, e.g. for Sync wf", + ) parser.add_argument( "--configure", action="store_true", @@ -1930,6 +1935,19 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No ) +def _set_pending_statuses(pr_info: PRInfo) -> None: + commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) + try: + commit.create_status( + state=PENDING, + target_url="", + description="", + context=StatusNames.SYNC, + ) + except Exception as ex: + print(f"ERROR: failed to set GH commit status, ex: {ex}") + + def main() -> int: logging.basicConfig(level=logging.INFO) exit_code = 0 @@ -2265,6 +2283,13 @@ def main() -> int: else: assert False, "BUG! Not supported scenario" + ### SET PENDING STATUS + elif args.cancel_previous_run: + if pr_info.is_pr: + _set_pending_statuses(pr_info) + else: + assert False, "BUG! Not supported scenario" + ### print results _print_results(result, args.outfile, args.pretty) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index e1c47353743..22cc0085781 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -433,11 +433,8 @@ def set_mergeable_check( commit: Commit, description: str = "", state: StatusType = SUCCESS, - hide_url: bool = False, ) -> CommitStatus: - report_url = GITHUB_RUN_URL - if hide_url: - report_url = "" + report_url = "" return post_commit_status( commit, state, @@ -469,7 +466,6 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> def trigger_mergeable_check( commit: Commit, statuses: CommitStatuses, - hide_url: bool = False, set_if_green: bool = False, workflow_failed: bool = False, ) -> StatusType: @@ -484,9 +480,12 @@ def trigger_mergeable_check( success = [] fail = [] + pending = [] for status in required_checks: if status.state == SUCCESS: success.append(status.context) + elif status.state == PENDING: + pending.append(status.context) else: fail.append(status.context) @@ -503,6 +502,8 @@ def trigger_mergeable_check( elif workflow_failed: description = "check workflow failures" state = FAILURE + elif pending: + description = "pending: " + ", ".join(pending) description = format_description(description) if not set_if_green and state == SUCCESS: @@ -510,7 +511,7 @@ def trigger_mergeable_check( pass else: if mergeable_status is None or mergeable_status.description != description: - set_mergeable_check(commit, description, state, hide_url) + set_mergeable_check(commit, description, state) return state diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 1a7000f5353..130973ee8ff 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -67,7 +67,7 @@ def main(): if status.state == PENDING: post_commit_status( commit, - SUCCESS, + state, # map Mergeable Check status to CI Running status.target_url, "All checks finished", StatusNames.CI, diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 500de4eb718..e1c7bf94ff5 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -250,7 +250,6 @@ def main(): trigger_mergeable_check( commit, statuses, - hide_url=False, set_if_green=True, workflow_failed=(args.wf_status != "success"), ) From 22b441ed40034280d80506150f9f4969966a3f87 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 11:46:50 +0200 Subject: [PATCH 597/651] fix PR template --- .github/PULL_REQUEST_TEMPLATE.md | 64 +++++++++++++++----------------- tests/ci/ci.py | 3 +- 2 files changed, 31 insertions(+), 36 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 64dc9049bc2..663b464d002 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -46,42 +46,36 @@ At a minimum, the following information should be added (but add more as needed) **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step - -#### Run these jobs only (required builds will be added automatically): -- [ ] Integration Tests -- [ ] Stateless tests -- [ ] Stateful tests -- [ ] Unit tests -- [ ] Performance tests -- [ ] All with aarch64 -- [ ] All with ASAN -- [ ] All with TSAN -- [ ] All with Analyzer -- [ ] All with Azure -- [ ] Add your option here - -#### Deny these jobs: -- [ ] Fast test -- [ ] Integration Tests -- [ ] Stateless tests -- [ ] Stateful tests -- [ ] Performance tests -- [ ] All with ASAN -- [ ] All with TSAN -- [ ] All with MSAN -- [ ] All with UBSAN -- [ ] All with Coverage -- [ ] All with Aarch64 - -#### Extra options: +--- +- [ ] Allow: Integration Tests +- [ ] Allow:: Stateless tests +- [ ] Allow: Stateful tests +- [ ] Allow: Unit tests +- [ ] Allow: Performance tests +- [ ] Allow: All with aarch64 +- [ ] Allow: All with ASAN +- [ ] Allow: All with TSAN +- [ ] Allow: All with Analyzer +- [ ] Allow: All with Azure +- [ ] Allow: Add your option here +--- +- [ ] Exclude: Fast test +- [ ] Exclude: Integration Tests +- [ ] Exclude: Stateless tests +- [ ] Exclude: Stateful tests +- [ ] Exclude: Performance tests +- [ ] Exclude: All with ASAN +- [ ] Exclude: All with TSAN +- [ ] Exclude: All with MSAN +- [ ] Exclude: All with UBSAN +- [ ] Exclude: All with Coverage +- [ ] Exclude: All with Aarch64 +--- - [ ] do not test (only style check) - [ ] disable merge-commit (no merge from master before tests) - [ ] disable CI cache (job reuse) - -#### Only specified batches in multi-batch jobs: -- [ ] 1 -- [ ] 2 -- [ ] 3 -- [ ] 4 - +- [ ] only batch 1 for multi-batch jobs +- [ ] only batch 2 for multi-batch jobs +- [ ] only batch 3 for multi-batch jobs +- [ ] only batch 4 for multi-batch jobs diff --git a/tests/ci/ci.py b/tests/ci/ci.py index fc25bee354d..c4e06ccd79a 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1938,6 +1938,7 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No def _set_pending_statuses(pr_info: PRInfo) -> None: commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) try: + print("Set SYNC status to pending") commit.create_status( state=PENDING, target_url="", @@ -2284,7 +2285,7 @@ def main() -> int: assert False, "BUG! Not supported scenario" ### SET PENDING STATUS - elif args.cancel_previous_run: + elif args.set_pending_status: if pr_info.is_pr: _set_pending_statuses(pr_info) else: From a725112c4c7e33ae23e970b2c50f762ca2edea96 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 May 2024 10:10:39 +0000 Subject: [PATCH 598/651] Fix different hashes for reading/writing from/to query cache --- src/Interpreters/executeQuery.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 0b5f68f27f6..59d012a0a0e 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1093,6 +1093,15 @@ static std::tuple executeQueryImpl( && (ast->as() || ast->as()); QueryCache::Usage query_cache_usage = QueryCache::Usage::None; + /// If the query runs with "use_query_cache = 1", we first probe if the query cache already contains the query result (if yes: + /// return result from cache). If doesn't, we execute the query normally and write the result into the query cache. Both steps use a + /// hash of the AST, the current database and the settings as cache key. Unfortunately, the settings are in some places internally + /// modified between steps 1 and 2 (= during query execution) - this is silly but hard to forbid. As a result, the hashes no longer + /// match and the cache is rendered ineffective. Therefore make a copy of the settings and use it for steps 1 and 2. + std::optional settings_copy; + if (can_use_query_cache) + settings_copy = settings; + if (!async_insert) { /// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set @@ -1101,7 +1110,7 @@ static std::tuple executeQueryImpl( { if (can_use_query_cache && settings.enable_reads_from_query_cache) { - QueryCache::Key key(ast, context->getCurrentDatabase(), settings, context->getUserID(), context->getCurrentRoles()); + QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles()); QueryCache::Reader reader = query_cache->createReader(key); if (reader.hasCacheEntryForKey()) { @@ -1224,7 +1233,7 @@ static std::tuple executeQueryImpl( && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( - ast, context->getCurrentDatabase(), settings, res.pipeline.getHeader(), + ast, context->getCurrentDatabase(), *settings_copy, res.pipeline.getHeader(), context->getUserID(), context->getCurrentRoles(), settings.query_cache_share_between_users, std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl), From 0e758722c6da7044fcb2c8958f175a8321c056a5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 May 2024 10:19:01 +0000 Subject: [PATCH 599/651] Enable 02494_query_cache_nested_query_bug for Analyzer --- .../0_stateless/02494_query_cache_nested_query_bug.reference | 2 +- tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference index 389e2621455..b261da18d51 100644 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference @@ -1,2 +1,2 @@ -2 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh index 8712c7c84c6..15015761295 100755 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh @@ -15,7 +15,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)" ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)" -SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" +SETTINGS="SETTINGS use_query_cache=1, max_threads=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated # with EXPLAIN PLAN, we need to check the logs. From f1421c9e5c542ed529dd3b225fc06c696a054080 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 12:02:14 +0200 Subject: [PATCH 600/651] style fix --- .github/PULL_REQUEST_TEMPLATE.md | 11 +++++------ tests/ci/commit_status_helper.py | 4 +--- tests/ci/finish_check.py | 4 ++-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 663b464d002..f9765c1d57b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -46,9 +46,8 @@ At a minimum, the following information should be added (but add more as needed) **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step ---- - [ ] Allow: Integration Tests -- [ ] Allow:: Stateless tests +- [ ] Allow: Stateless tests - [ ] Allow: Stateful tests - [ ] Allow: Unit tests - [ ] Allow: Performance tests @@ -74,8 +73,8 @@ At a minimum, the following information should be added (but add more as needed) - [ ] do not test (only style check) - [ ] disable merge-commit (no merge from master before tests) - [ ] disable CI cache (job reuse) -- [ ] only batch 1 for multi-batch jobs -- [ ] only batch 2 for multi-batch jobs -- [ ] only batch 3 for multi-batch jobs -- [ ] only batch 4 for multi-batch jobs +- [ ] allow: batch 1 for multi-batch jobs +- [ ] allow: batch 2 +- [ ] allow: batch 3 +- [ ] allow: batch 4, 5 and 6 diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 22cc0085781..bdbb0e80653 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -20,7 +20,6 @@ from github.Repository import Repository from ci_config import CHECK_DESCRIPTIONS, CheckDescription, StatusNames, is_required from env_helper import ( GITHUB_REPOSITORY, - GITHUB_RUN_URL, GITHUB_UPSTREAM_REPOSITORY, TEMP_PATH, ) @@ -557,13 +556,12 @@ def update_upstream_sync_status( post_commit_status( last_synced_upstream_commit, sync_status, - "", # let's won't expose any urls from cloud + "", "", StatusNames.SYNC, ) trigger_mergeable_check( last_synced_upstream_commit, get_commit_filtered_statuses(last_synced_upstream_commit), - True, set_if_green=can_set_green_mergeable_status, ) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 130973ee8ff..269d5aa3175 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -15,7 +15,7 @@ from commit_status_helper import ( ) from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import PENDING, SUCCESS +from report import PENDING from synchronizer_utils import SYNC_BRANCH_PREFIX from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY @@ -67,7 +67,7 @@ def main(): if status.state == PENDING: post_commit_status( commit, - state, # map Mergeable Check status to CI Running + state, # map Mergeable Check status to CI Running status.target_url, "All checks finished", StatusNames.CI, From 1f1c2c21b19dc3d29b60f0508b79bceb425585e7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 May 2024 10:32:42 +0000 Subject: [PATCH 601/651] Fix spelling --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6df2e426561..6eae333681d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1617,6 +1617,8 @@ gcem generateRandom generateRandomStructure generateSeries +generateSnowflakeID +generateSnowflakeIDThreadMonotonic generateULID generateUUIDv geoDistance From 7ccb776ed93196e72485aa0219d7b281ea0f68de Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 12:39:35 +0200 Subject: [PATCH 602/651] mcheck fix --- tests/ci/commit_status_helper.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index bdbb0e80653..b17c189c405 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -490,11 +490,6 @@ def trigger_mergeable_check( state: StatusType = SUCCESS - if success: - description = ", ".join(success) - else: - description = "awaiting job statuses" - if fail: description = "failed: " + ", ".join(fail) state = FAILURE @@ -503,6 +498,11 @@ def trigger_mergeable_check( state = FAILURE elif pending: description = "pending: " + ", ".join(pending) + state = PENDING + else: + # all good + description = ", ".join(success) + description = format_description(description) if not set_if_green and state == SUCCESS: From 534f996be3ec5baa544b45180fd1ff049eb2cada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 13:07:37 +0200 Subject: [PATCH 603/651] Change input_format_parquet_use_native_reader to 24.6 --- src/Core/SettingsChangesHistory.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 23f7810835c..9b5bf6b50a5 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.6", {{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, + }}, {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, @@ -93,7 +95,6 @@ static std::map sett {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, {"http_max_chunk_size", 0, 0, "Internal limitation"}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, From a6e06b27d221cfd7f5b7987c2b642487b2a80d01 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 24 May 2024 14:17:37 +0200 Subject: [PATCH 604/651] Update description for settings cross_join_min_rows_to_compress and cross_join_min_bytes_to_compress --- src/Core/SettingsChangesHistory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 23f7810835c..0521f70a91b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -89,8 +89,8 @@ static std::map sett {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, + {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, {"http_max_chunk_size", 0, 0, "Internal limitation"}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, From 3d207039584cb69d9fffe1b3ec923a31fab5f032 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 24 May 2024 12:27:19 +0000 Subject: [PATCH 605/651] Force-enable analyzer so that tests without Analyzer can no longer fail --- tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh index 15015761295..a5339a098dc 100755 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh @@ -15,7 +15,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)" ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)" -SETTINGS="SETTINGS use_query_cache=1, max_threads=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" +SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated # with EXPLAIN PLAN, we need to check the logs. From aada1de796144829b2a6e334764923cef6da4fff Mon Sep 17 00:00:00 2001 From: TTPO100AJIEX Date: Fri, 24 May 2024 15:36:41 +0300 Subject: [PATCH 606/651] Rename function parameters, remove unnecessary virtual --- src/Server/ServersManager/IServersManager.cpp | 8 ++-- src/Server/ServersManager/IServersManager.h | 14 +++---- .../ServersManager/InterServersManager.cpp | 20 +++++----- .../ServersManager/InterServersManager.h | 1 - .../ServersManager/ProtocolServersManager.cpp | 40 +++++++++---------- 5 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/Server/ServersManager/IServersManager.cpp b/src/Server/ServersManager/IServersManager.cpp index c903d90f766..8b1eee94303 100644 --- a/src/Server/ServersManager/IServersManager.cpp +++ b/src/Server/ServersManager/IServersManager.cpp @@ -17,8 +17,8 @@ extern const int NETWORK_ERROR; extern const int INVALID_CONFIG_PARAMETER; } -IServersManager::IServersManager(ContextMutablePtr l_global_context, Poco::Logger * l_logger) - : global_context(l_global_context), logger(l_logger) +IServersManager::IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_) + : global_context(global_context_), logger(logger_) { } @@ -107,8 +107,8 @@ void IServersManager::createServer( const Poco::Util::AbstractConfiguration & config, const std::string & listen_host, const char * port_name, - CreateServerFunc && func, - bool start_server) + bool start_server, + CreateServerFunc && func) { /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. if (config.getString(port_name, "").empty()) diff --git a/src/Server/ServersManager/IServersManager.h b/src/Server/ServersManager/IServersManager.h index 5218ab63554..7e1d9d50d82 100644 --- a/src/Server/ServersManager/IServersManager.h +++ b/src/Server/ServersManager/IServersManager.h @@ -19,7 +19,7 @@ namespace DB class IServersManager { public: - IServersManager(ContextMutablePtr global_context, Poco::Logger * logger); + IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_); virtual ~IServersManager() = default; bool empty() const; @@ -35,9 +35,9 @@ public: const ServerType & server_type) = 0; - virtual void startServers(); + void startServers(); - virtual void stopServers(const ServerType & server_type); + void stopServers(const ServerType & server_type); virtual size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) = 0; virtual void updateServers( @@ -58,14 +58,14 @@ protected: const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const; using CreateServerFunc = std::function; - virtual void createServer( + void createServer( const Poco::Util::AbstractConfiguration & config, const std::string & listen_host, const char * port_name, - CreateServerFunc && func, - bool start_server); + bool start_server, + CreateServerFunc && func); - virtual void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config); + void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config); Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) const; bool getListenTry(const Poco::Util::AbstractConfiguration & config) const; diff --git a/src/Server/ServersManager/InterServersManager.cpp b/src/Server/ServersManager/InterServersManager.cpp index 28491a4f4f4..4425d468248 100644 --- a/src/Server/ServersManager/InterServersManager.cpp +++ b/src/Server/ServersManager/InterServersManager.cpp @@ -71,6 +71,7 @@ void InterServersManager::createServers( config, listen_host, port_name, + /* start_server = */ false, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -92,14 +93,14 @@ void InterServersManager::createServers( false), server_pool, socket)); - }, - /* start_server = */ false); + }); constexpr auto secure_port_name = "keeper_server.tcp_port_secure"; createServer( config, listen_host, secure_port_name, + /* start_server = */ false, [&](UInt16 port) -> ProtocolServerAdapter { # if USE_SSL @@ -128,14 +129,14 @@ void InterServersManager::createServers( ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); # endif - }, - /* start_server: */ false); + }); /// HTTP control endpoints createServer( config, listen_host, /* port_name = */ "keeper_server.http_control.port", + /* start_server = */ false, [&](UInt16 port) -> ProtocolServerAdapter { auto http_context = std::make_shared(global_context); @@ -159,8 +160,7 @@ void InterServersManager::createServers( server_pool, socket, http_params)); - }, - /* start_server: */ false); + }); } #else throw Exception( @@ -264,6 +264,7 @@ void InterServersManager::createInterserverServers( config, interserver_listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -282,8 +283,7 @@ void InterServersManager::createInterserverServers( http_params, ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes)); - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) @@ -293,6 +293,7 @@ void InterServersManager::createInterserverServers( config, interserver_listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL @@ -318,8 +319,7 @@ void InterServersManager::createInterserverServers( ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif - }, - start_servers); + }); } } } diff --git a/src/Server/ServersManager/InterServersManager.h b/src/Server/ServersManager/InterServersManager.h index 2a389e28c22..8780eae18e0 100644 --- a/src/Server/ServersManager/InterServersManager.h +++ b/src/Server/ServersManager/InterServersManager.h @@ -19,7 +19,6 @@ public: bool start_servers, const ServerType & server_type) override; - using IServersManager::stopServers; size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override; void updateServers( diff --git a/src/Server/ServersManager/ProtocolServersManager.cpp b/src/Server/ServersManager/ProtocolServersManager.cpp index 17b028eddbb..af57de3ac3c 100644 --- a/src/Server/ServersManager/ProtocolServersManager.cpp +++ b/src/Server/ServersManager/ProtocolServersManager.cpp @@ -99,6 +99,7 @@ void ProtocolServersManager::createServers( config, host, port_name.c_str(), + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -110,8 +111,7 @@ void ProtocolServersManager::createServers( port_name.c_str(), description + ": " + address.toString(), std::make_unique(stack.release(), server_pool, socket, new Poco::Net::TCPServerParams)); - }, - start_servers); + }); } } @@ -125,6 +125,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -143,8 +144,7 @@ void ProtocolServersManager::createServers( http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::HTTPS)) @@ -155,6 +155,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL @@ -180,8 +181,7 @@ void ProtocolServersManager::createServers( ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::TCP)) @@ -192,6 +192,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -208,8 +209,7 @@ void ProtocolServersManager::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) @@ -220,6 +220,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -236,8 +237,7 @@ void ProtocolServersManager::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) @@ -248,6 +248,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL @@ -271,8 +272,7 @@ void ProtocolServersManager::createServers( ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::MYSQL)) @@ -282,6 +282,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -298,8 +299,7 @@ void ProtocolServersManager::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); - }, - start_servers); + }); } if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) @@ -309,6 +309,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -325,8 +326,7 @@ void ProtocolServersManager::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); - }, - start_servers); + }); } #if USE_GRPC @@ -337,6 +337,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::SocketAddress server_address(listen_host, port); @@ -345,8 +346,7 @@ void ProtocolServersManager::createServers( port_name, "gRPC protocol: " + server_address.toString(), std::make_unique(server, makeSocketAddress(listen_host, port, logger))); - }, - start_servers); + }); } #endif if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) @@ -357,6 +357,7 @@ void ProtocolServersManager::createServers( config, listen_host, port_name, + start_servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; @@ -375,8 +376,7 @@ void ProtocolServersManager::createServers( http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)); - }, - start_servers); + }); } } } From 2cc1b27fb5f898a8c728dda03f4dea3941c653b4 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 24 May 2024 14:41:04 +0200 Subject: [PATCH 607/651] Update docs for settings cross_join_min_rows_to_compress and cross_join_min_bytes_to_compress --- docs/en/operations/settings/settings.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2b5cd11819a..b2efe5d2af4 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5468,3 +5468,15 @@ Defines how MySQL types are converted to corresponding ClickHouse types. A comma - `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`. - `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`. - `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`. + +## cross_join_min_rows_to_compress + +Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. + +Default value: `10000000`. + +## cross_join_min_bytes_to_compress + +Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. + +Default value: `1GiB`. From 7f450cfbdd7578a0b1519f74ff7998f400793284 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 22 May 2024 17:17:43 +0000 Subject: [PATCH 608/651] Try add alias to array join. --- src/Analyzer/ArrayJoinNode.cpp | 19 +++++ src/Analyzer/ColumnNode.cpp | 7 +- src/Analyzer/Passes/QueryAnalysisPass.cpp | 69 +++++++++++++++---- src/Analyzer/QueryTreeBuilder.cpp | 4 +- src/Analyzer/createUniqueTableAliases.cpp | 34 +++++++++ src/Parsers/ASTTablesInSelectQuery.cpp | 9 +++ src/Parsers/ASTTablesInSelectQuery.h | 4 ++ src/Parsers/ParserTablesInSelectQuery.cpp | 4 ++ .../QueryPlan/DistributedCreateLocalPlan.cpp | 8 +++ 9 files changed, 144 insertions(+), 14 deletions(-) diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index 59389d4f2a8..9c1eb9dce3e 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -24,6 +24,9 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat buffer << std::string(indent, ' ') << "ARRAY_JOIN id: " << format_state.getNodeId(this); buffer << ", is_left: " << is_left; + if (hasAlias()) + buffer << ", alias: " << getAlias(); + buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n"; getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); @@ -52,6 +55,8 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const auto array_join_ast = std::make_shared(); array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner; + array_join_ast->setAlias(getAlias()); + auto array_join_expressions_ast = std::make_shared(); const auto & array_join_expressions = getJoinExpressions().getNodes(); @@ -65,7 +70,21 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const else array_join_expression_ast = array_join_expression->toAST(options); + // QueryTreeNodePtr column_source; + // if (column_node) + // column_source = column_node->getColumnSourceOrNull(); + + // if (column_source && column_source->hasAlias()) + // { + // const auto & column_alias = column_node->getAlias(); + // const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias; + + // if (!name_or_alias.starts_with("__")) + // array_join_expression_ast->setAlias(fmt::format("{}.{}", column_source->getAlias(), name_or_alias)); + // } + // else array_join_expression_ast->setAlias(array_join_expression->getAlias()); + array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast)); } diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp index 2b514a85121..f76c096a339 100644 --- a/src/Analyzer/ColumnNode.cpp +++ b/src/Analyzer/ColumnNode.cpp @@ -103,10 +103,15 @@ ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const if (column_source && options.fully_qualified_identifiers) { auto node_type = column_source->getNodeType(); + + // if (node_type == QueryTreeNodeType::ARRAY_JOIN && column_source->hasAlias()) + // return std::make_shared(std::string(fmt::format("{}.{}", column_source->getAlias(), column.name))); + if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::TABLE_FUNCTION || node_type == QueryTreeNodeType::QUERY || - node_type == QueryTreeNodeType::UNION) + node_type == QueryTreeNodeType::UNION || + node_type == QueryTreeNodeType::ARRAY_JOIN) { if (column_source->hasAlias()) { diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index b7c223303eb..f55f6d6c18f 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1068,10 +1068,25 @@ public: void visitImpl(QueryTreeNodePtr & node) { updateAliasesIfNeeded(node, false /*is_lambda_node*/); + + // if (auto * array_join_node = node->as()) + // { + // for (const auto & elem : array_join_node->getJoinExpressions()) + // { + // for (auto & child : elem->getChildren()) + // { + // // std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl; + // visit(child); + // } + // } + // } } bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child) { + // if (parent->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) + // return false; + if (auto * lambda_node = child->as()) { updateAliasesIfNeeded(child, true /*is_lambda_node*/); @@ -1114,6 +1129,8 @@ private: if (node->getNodeType() == QueryTreeNodeType::WINDOW) return; + // std::cerr << ">>>>>>>>>> " << node->dumpTree() << std::endl; + const auto & alias = node->getAlias(); if (is_lambda_node) @@ -1526,7 +1543,7 @@ private: ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope); - ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); + ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table = true); ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); @@ -3794,6 +3811,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) { + // std::cerr << "tryResolveExpressionFromArrayJoinExpressions " << scope.dump() << std::endl; + const auto & array_join_node = table_expression_node->as(); const auto & array_join_column_expressions_list = array_join_node.getJoinExpressions(); const auto & array_join_column_expressions_nodes = array_join_column_expressions_list.getNodes(); @@ -3871,9 +3890,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) { + // std::cerr << "tryResolveIdentifierFromArrayJoin " << identifier_lookup.identifier.getFullName() << std::endl; + const auto & from_array_join_node = table_expression_node->as(); auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope); + // std::cerr << "tryResolveIdentifierFromArrayJoin 2 " << scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) + // << ' ' << identifier_lookup.dump() << '\n' << table_expression_node->dumpTree() << std::endl; + if (scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) || !identifier_lookup.isExpressionLookup()) return resolved_identifier; @@ -3888,8 +3912,11 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi for (const auto & array_join_column_expression : array_join_column_expressions_nodes) { auto & array_join_column_expression_typed = array_join_column_expression->as(); + // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl; - if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName()) + const auto & parts = identifier_lookup.identifier.getParts(); + if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() || + (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias())) { auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()); @@ -3911,6 +3938,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident const QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope) { + // std::cerr << "tryResolveIdentifierFromJoinTreeNode " << identifier_lookup.identifier.getFullName() << std::endl; + auto join_tree_node_type = join_tree_node->getNodeType(); switch (join_tree_node_type) @@ -3964,6 +3993,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie if (identifier_lookup.isFunctionLookup()) return {}; + // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl; + /// Try to resolve identifier from table columns if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope)) return resolved_identifier; @@ -4112,6 +4143,8 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook IdentifierResolveScope & scope, IdentifierResolveSettings identifier_resolve_settings) { + // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl; + auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup); if (it != scope.identifier_lookup_to_resolve_state.end()) { @@ -6284,7 +6317,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression) +ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table) { checkStackSize(); @@ -6334,7 +6367,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use * alias table because in alias table subquery could be evaluated as scalar. */ - bool use_alias_table = true; + //bool use_alias_table = true; if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType()))) use_alias_table = false; @@ -7569,22 +7602,33 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif for (auto & array_join_expression : array_join_nodes) { auto array_join_expression_alias = array_join_expression->getAlias(); - if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias)) - throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, - "ARRAY JOIN expression {} with duplicate alias {}. In scope {}", - array_join_expression->formatASTForErrorMessage(), - array_join_expression_alias, - scope.scope_node->formatASTForErrorMessage()); + // if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias)) + // throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, + // "ARRAY JOIN expression {} with duplicate alias {}. In scope {}", + // array_join_expression->formatASTForErrorMessage(), + // array_join_expression_alias, + // scope.scope_node->formatASTForErrorMessage()); /// Add array join expression into scope - expressions_visitor.visit(array_join_expression); + + for (const auto & elem : array_join_nodes) + { + for (auto & child : elem->getChildren()) + { + //std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl; + expressions_visitor.visit(child); + //visit(child); + } + } + + //expressions_visitor.visit(array_join_expression); std::string identifier_full_name; if (auto * identifier_node = array_join_expression->as()) identifier_full_name = identifier_node->getIdentifier().getFullName(); - resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, false); auto process_array_join_expression = [&](QueryTreeNodePtr & expression) { @@ -8456,6 +8500,7 @@ QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_ana void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { + // std::cerr << ".... qap\n" << query_tree_node->dumpTree() << std::endl; QueryAnalyzer analyzer(only_analyze); analyzer.resolve(query_tree_node, table_expression, context); createUniqueTableAliases(query_tree_node, table_expression, context); diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 6a5db4bc1de..1d4810296b4 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -957,6 +957,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context); auto array_join_node = std::make_shared(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join); + array_join_node->setAlias(array_join_expression.tryGetAlias()); /** Original AST is not set because it will contain only array join part and does * not include left table expression. @@ -1045,7 +1046,8 @@ ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context) { QueryTreeBuilder builder(std::move(query), context); - return builder.getQueryTreeNode(); + auto qt = builder.getQueryTreeNode(); + return qt; } } diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp index 8f850fe8dec..30b8c0a433b 100644 --- a/src/Analyzer/createUniqueTableAliases.cpp +++ b/src/Analyzer/createUniqueTableAliases.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include #include #include @@ -58,6 +60,38 @@ public: alias = fmt::format("__table{}", ++next_id); node->setAlias(alias); } + + if (auto * array_join = node->as()) + { + //size_t counter = 0; + for (auto & column : array_join->getJoinExpressions()) + { + if (auto * column_node = column->as()) + { + if (!column_node->hasAlias()) + column_node->setAlias(column_node->getColumnName()); + } + } + } + + // if (auto * array_join = node->as()) + // { + // for (auto & column : array_join->getJoinExpressions()) + // { + // if (auto * column_node = column->as()) + // { + // const auto & column_alias = column_node->getAlias(); + // const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias; + + // if (!name_or_alias.starts_with("__")) + // { + + // column_node->setAlias(fmt::format("{}.{}", alias, name_or_alias)); + // } + // } + // } + // } + break; } default: diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index e782bad797e..2f3e9207f81 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -247,6 +247,12 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta formatImplAfterTable(settings, state, frame); } +static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings) +{ + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : ""); + settings.writeIdentifier(name); + settings.ostr << (settings.hilite ? IAST::hilite_none : ""); +} void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { @@ -258,6 +264,9 @@ void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & sta << indent_str << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : ""); + if (!alias.empty()) + writeAlias(alias, settings); + settings.one_line ? expression_list->formatImpl(settings, state, frame) : expression_list->as().formatImplMultiline(settings, state, frame); diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h index f3f329ca2b6..4619b22f022 100644 --- a/src/Parsers/ASTTablesInSelectQuery.h +++ b/src/Parsers/ASTTablesInSelectQuery.h @@ -95,6 +95,10 @@ struct ASTArrayJoin : public IAST /// List of array or nested names to JOIN, possible with aliases. ASTPtr expression_list; + String alias; + + String tryGetAlias() const override { return alias; } + void setAlias(const String & to) override { alias = to; } using IAST::IAST; String getID(char) const override { return "ArrayJoin"; } diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index b4d48ae67e9..b2a801c8943 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -98,6 +98,10 @@ bool ParserArrayJoin::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!has_array_join) return false; + ASTPtr alias_node; + if (ParserAlias(false).parse(pos, alias_node, expected)) + tryGetIdentifierNameInto(alias_node, res->alias); + if (!ParserExpressionList(false).parse(pos, res->expression_list, expected)) return false; diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index d4545482477..aef3c03255e 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -2,6 +2,7 @@ #include #include +#include "Parsers/queryToString.h" #include #include #include @@ -68,12 +69,19 @@ std::unique_ptr createLocalPlan( if (context->getSettingsRef().allow_experimental_analyzer) { + // std::cerr << query_ast->dumpTree() << std::endl; + // std::cerr << queryToString(query_ast) << std::endl; + /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). new_context->setSetting("enable_positional_arguments", Field(false)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); + // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl; query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); + WriteBufferFromOwnString buf; + query_plan->explainPlan(buf, {.header=true, .actions=true}); + // std::cerr << buf.str() << std::endl; } else { From b4581286f74bcdfe199c3b8967e237ae3375cd88 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 May 2024 16:34:11 +0000 Subject: [PATCH 609/651] Properly resolve array join columns. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 60 ++++++++++++++++--- .../02374_analyzer_array_join.reference | 16 ++++- .../0_stateless/02374_analyzer_array_join.sql | 4 +- .../02521_analyzer_array_join_crash.reference | 9 ++- .../02521_analyzer_array_join_crash.sql | 6 +- 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f55f6d6c18f..6bce3dff49d 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -607,6 +607,8 @@ struct ScopeAliases std::unordered_set nodes_with_duplicated_aliases; std::vector cloned_nodes_with_duplicated_aliases; + std::unordered_set array_join_aliases; + std::unordered_map & getAliasMap(IdentifierLookupContext lookup_context) { switch (lookup_context) @@ -2875,7 +2877,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope) { - return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr; + return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr || scope.aliases.array_join_aliases.contains(identifier_lookup.identifier.front()); } /** Resolve identifier from scope aliases. @@ -2924,6 +2926,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier IdentifierResolveSettings identifier_resolve_settings) { const auto & identifier_bind_part = identifier_lookup.identifier.front(); + // std::cerr << "tryResolveIdentifierFromAliases " << identifier_lookup.dump() << std::endl; auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME); if (it == nullptr) @@ -2952,6 +2955,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier } auto node_type = alias_node->getNodeType(); + // std::cerr << "tryResolveIdentifierFromAliases 1.5 \n" << alias_node->dumpTree() << std::endl; /// Resolve expression if necessary if (node_type == QueryTreeNodeType::IDENTIFIER) @@ -2960,6 +2964,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier auto & alias_identifier_node = alias_node->as(); auto identifier = alias_identifier_node.getIdentifier(); + // std::cerr << "tryResolveIdentifierFromAliases 2 " << identifier.getFullName() << std::endl; auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); if (!lookup_result.resolved_identifier) { @@ -3136,6 +3141,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage( size_t identifier_column_qualifier_parts, bool can_be_not_found) { + // std::cerr << "tryResolveIdentifierFromStorage " << identifier.getFullName() << std::endl; auto identifier_without_column_qualifier = identifier; identifier_without_column_qualifier.popFirst(identifier_column_qualifier_parts); @@ -3278,6 +3284,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage( { auto qualified_identifier_with_removed_part = qualified_identifier; qualified_identifier_with_removed_part.popFirst(); + // std::cerr << "tryResolveIdentifierFromStorage qualified_identifier_with_removed_part" << qualified_identifier_with_removed_part.getFullName() << std::endl; if (qualified_identifier_with_removed_part.empty()) break; @@ -3896,7 +3903,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope); // std::cerr << "tryResolveIdentifierFromArrayJoin 2 " << scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) - // << ' ' << identifier_lookup.dump() << '\n' << table_expression_node->dumpTree() << std::endl; + // << ' ' << identifier_lookup.dump() << ' ' << (resolved_identifier ? resolved_identifier->dumpTree() : "not resolved ") << std::endl; if (scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) || !identifier_lookup.isExpressionLookup()) return resolved_identifier; @@ -3914,14 +3921,48 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi auto & array_join_column_expression_typed = array_join_column_expression->as(); // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl; - const auto & parts = identifier_lookup.identifier.getParts(); - if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() || - (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias())) + IdentifierView identifier_view(identifier_lookup.identifier); + + if (identifier_view.isCompound() && from_array_join_node.hasAlias() && identifier_view.front() == from_array_join_node.getAlias()) + identifier_view.popFirst(); + + const auto & alias_or_name = array_join_column_expression_typed.hasAlias() + ? array_join_column_expression_typed.getAlias() + : array_join_column_expression_typed.getColumnName(); + + if (identifier_view.front() == alias_or_name) + identifier_view.popFirst(); + else if (identifier_view.getFullName() == alias_or_name) + identifier_view.popFirst(identifier_view.getPartsSize()); /// Clear + else + continue; + + if (identifier_view.empty()) { auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()); return array_join_column; } + + auto compound_expr = tryResolveIdentifierFromCompoundExpression( + identifier_lookup.identifier, + identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/, + array_join_column_expression, + {} /* compound_expression_source */, + scope, + true /* can_be_not_found */); + + if (compound_expr) + return compound_expr; + + // const auto & parts = identifier_lookup.identifier.getParts(); + // if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() || + // (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias())) + // { + // auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), + // array_join_column_expression_typed.getColumnSource()); + // return array_join_column; + // } } if (!resolved_identifier) @@ -3993,7 +4034,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie if (identifier_lookup.isFunctionLookup()) return {}; - // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl; + // std::cerr << "tryResolveIdentifierFromJoinTree " << identifier_lookup.identifier.getFullName() << std::endl; /// Try to resolve identifier from table columns if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope)) @@ -7613,15 +7654,18 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif for (const auto & elem : array_join_nodes) { + if (elem->hasAlias()) + scope.aliases.array_join_aliases.insert(elem->getAlias()); for (auto & child : elem->getChildren()) { //std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl; - expressions_visitor.visit(child); + if (child) + expressions_visitor.visit(child); //visit(child); } } - //expressions_visitor.visit(array_join_expression); + // expressions_visitor.visit(array_join_expression); std::string identifier_full_name; diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference index 6dd384c7d9c..44f3e5a95e9 100644 --- a/tests/queries/0_stateless/02374_analyzer_array_join.reference +++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference @@ -45,7 +45,13 @@ SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS val 0 Value [1,2,3] 1 0 Value [1,2,3] 2 0 Value [1,2,3] 3 -SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; +1 +2 +3 +1 +2 +3 SELECT 'ARRAY JOIN with column'; ARRAY JOIN with column SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; @@ -84,7 +90,13 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J 0 Value [4,5,6] SELECT '--'; -- -SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; +1 +2 +3 +4 +5 +6 SELECT '--'; -- SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql index bc4bb6616c1..dfd3b755aff 100644 --- a/tests/queries/0_stateless/02374_analyzer_array_join.sql +++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql @@ -33,7 +33,7 @@ SELECT '--'; SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2; -SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; SELECT 'ARRAY JOIN with column'; @@ -53,7 +53,7 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J SELECT '--'; -SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; SELECT '--'; diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference index 5e7728e0590..426cfe35e73 100644 --- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference +++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference @@ -1,11 +1,10 @@ -- { echoOn } -SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -0 [1,2,3] [1,2,3] +SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -- { serverError UNKNOWN_IDENTIFIER } SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element ARRAY JOIN value_element AS value; 0 [1,2,3] 1 0 [1,2,3] 2 0 [1,2,3] 3 -SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value; -1048577 [1048577] -SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 } +SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element ARRAY JOIN arrayMap(x -> value_element, ['']) AS value; +1048577 1048577 +SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem ARRAY JOIN arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql index 53606e01ab7..7842d47d757 100644 --- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql +++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql @@ -11,13 +11,13 @@ INSERT INTO test_table VALUES (0, 'Value'); -- { echoOn } -SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; +SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -- { serverError UNKNOWN_IDENTIFIER } SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element ARRAY JOIN value_element AS value; -SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value; +SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element ARRAY JOIN arrayMap(x -> value_element, ['']) AS value; -SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 } +SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem ARRAY JOIN arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError ILLEGAL_COLUMN } -- { echoOff } From 317941f06af836d719e1360b04616970271ecc12 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 May 2024 17:01:46 +0000 Subject: [PATCH 610/651] Add a test. --- .../03156_analyzer_array_join_distributed.reference | 12 ++++++++++++ .../03156_analyzer_array_join_distributed.sql | 10 ++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference create mode 100644 tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference new file mode 100644 index 00000000000..b5b2aec9c12 --- /dev/null +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference @@ -0,0 +1,12 @@ +Hello [1,2] 1 +Hello [1,2] 2 +Hello [1,2] 1 +Hello [1,2] 1 +Hello [1,2] 2 +Hello [1,2] 2 +Hello 1 +Hello 2 +Hello 1 +Hello 1 +Hello 2 +Hello 2 diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql new file mode 100644 index 00000000000..f605a369822 --- /dev/null +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql @@ -0,0 +1,10 @@ +CREATE TABLE arrays_test (s String, arr Array(UInt8)) ENGINE = MergeTree() ORDER BY (s); + +INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); + +SELECT s, arr, a FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a; +SELECT s, arr, a FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a; + + +SELECT s, arr FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; +SELECT s, arr FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; From bee3c50ecd4a41e64d29812b5607927c12dba111 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 May 2024 17:23:02 +0000 Subject: [PATCH 611/651] Try not to add alias to array join. --- src/Analyzer/ArrayJoinNode.cpp | 2 +- src/Analyzer/ColumnNode.cpp | 4 ++-- src/Analyzer/QueryTreeBuilder.cpp | 2 +- src/Parsers/ASTTablesInSelectQuery.cpp | 16 ++++++++-------- src/Parsers/ASTTablesInSelectQuery.h | 6 +++--- src/Parsers/ParserTablesInSelectQuery.cpp | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index 9c1eb9dce3e..37c198f8472 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -55,7 +55,7 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const auto array_join_ast = std::make_shared(); array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner; - array_join_ast->setAlias(getAlias()); + // array_join_ast->setAlias(getAlias()); auto array_join_expressions_ast = std::make_shared(); const auto & array_join_expressions = getJoinExpressions().getNodes(); diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp index f76c096a339..d12eac68ab4 100644 --- a/src/Analyzer/ColumnNode.cpp +++ b/src/Analyzer/ColumnNode.cpp @@ -110,8 +110,8 @@ ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::TABLE_FUNCTION || node_type == QueryTreeNodeType::QUERY || - node_type == QueryTreeNodeType::UNION || - node_type == QueryTreeNodeType::ARRAY_JOIN) + node_type == QueryTreeNodeType::UNION)// || + //node_type == QueryTreeNodeType::ARRAY_JOIN) { if (column_source->hasAlias()) { diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 1d4810296b4..02d742f5e49 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -957,7 +957,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context); auto array_join_node = std::make_shared(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join); - array_join_node->setAlias(array_join_expression.tryGetAlias()); + // array_join_node->setAlias(array_join_expression.tryGetAlias()); /** Original AST is not set because it will contain only array join part and does * not include left table expression. diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 2f3e9207f81..b4058a0950d 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -247,12 +247,12 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta formatImplAfterTable(settings, state, frame); } -static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings) -{ - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : ""); - settings.writeIdentifier(name); - settings.ostr << (settings.hilite ? IAST::hilite_none : ""); -} +// static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings) +// { +// settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : ""); +// settings.writeIdentifier(name); +// settings.ostr << (settings.hilite ? IAST::hilite_none : ""); +// } void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { @@ -264,8 +264,8 @@ void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & sta << indent_str << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : ""); - if (!alias.empty()) - writeAlias(alias, settings); + // if (!alias.empty()) + // writeAlias(alias, settings); settings.one_line ? expression_list->formatImpl(settings, state, frame) diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h index 4619b22f022..212436b0d9e 100644 --- a/src/Parsers/ASTTablesInSelectQuery.h +++ b/src/Parsers/ASTTablesInSelectQuery.h @@ -95,10 +95,10 @@ struct ASTArrayJoin : public IAST /// List of array or nested names to JOIN, possible with aliases. ASTPtr expression_list; - String alias; + // String alias; - String tryGetAlias() const override { return alias; } - void setAlias(const String & to) override { alias = to; } + // String tryGetAlias() const override { return alias; } + // void setAlias(const String & to) override { alias = to; } using IAST::IAST; String getID(char) const override { return "ArrayJoin"; } diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index b2a801c8943..c96b6c1584d 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -98,9 +98,9 @@ bool ParserArrayJoin::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!has_array_join) return false; - ASTPtr alias_node; - if (ParserAlias(false).parse(pos, alias_node, expected)) - tryGetIdentifierNameInto(alias_node, res->alias); + // ASTPtr alias_node; + // if (ParserAlias(false).parse(pos, alias_node, expected)) + // tryGetIdentifierNameInto(alias_node, res->alias); if (!ParserExpressionList(false).parse(pos, res->expression_list, expected)) return false; From a19472ddd58d121c8cda910dd7690fa37fb66065 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 23 May 2024 17:53:17 +0000 Subject: [PATCH 612/651] Connect code. --- src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index aef3c03255e..ad94dd2c173 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -78,9 +78,9 @@ std::unique_ptr createLocalPlan( new_context->setSetting("enable_positional_arguments", Field(false)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl; - query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); - WriteBufferFromOwnString buf; - query_plan->explainPlan(buf, {.header=true, .actions=true}); + // query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); + // WriteBufferFromOwnString buf; + // query_plan->explainPlan(buf, {.header=true, .actions=true}); // std::cerr << buf.str() << std::endl; } else From 1e5872cb4ea8237d24528d2595a6708a36204a00 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 24 May 2024 11:02:31 +0200 Subject: [PATCH 613/651] Update DistributedCreateLocalPlan.cpp --- src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index ad94dd2c173..e4d908e2af0 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -78,7 +78,7 @@ std::unique_ptr createLocalPlan( new_context->setSetting("enable_positional_arguments", Field(false)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl; - // query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); + query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); // WriteBufferFromOwnString buf; // query_plan->explainPlan(buf, {.header=true, .actions=true}); // std::cerr << buf.str() << std::endl; From 634f7c35e8348cbf0c77de729bde131d34ca6336 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 24 May 2024 12:43:40 +0000 Subject: [PATCH 614/651] Better. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 40 +++++++++++-------- .../02374_analyzer_array_join.reference | 24 ++++++----- .../0_stateless/02374_analyzer_array_join.sql | 3 ++ 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 6bce3dff49d..871c3842de0 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1545,7 +1545,7 @@ private: ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope); - ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table = true); + ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias = false); ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); @@ -3919,6 +3919,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi for (const auto & array_join_column_expression : array_join_column_expressions_nodes) { auto & array_join_column_expression_typed = array_join_column_expression->as(); + // std::cerr << "========== " << array_join_column_expression->dumpTree() << std::endl; // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl; IdentifierView identifier_view(identifier_lookup.identifier); @@ -6358,10 +6359,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table) +ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) { checkStackSize(); + // std::cerr << "resolveExpressionNode " << ignore_alias << "\n" << node->dumpTree() << std::endl; + auto resolved_expression_it = resolved_expressions.find(node); if (resolved_expression_it != resolved_expressions.end()) { @@ -6378,6 +6381,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id evaluateScalarSubqueryIfNeeded(node, subquery_scope); } + // std::cerr << "resolveExpressionNode taken from cache \n" << node->dumpTree() << "\n PN " << (resolved_expression_it->second.empty() ? "" : resolved_expression_it->second.front()) << std::endl; return resolved_expression_it->second; } @@ -6388,7 +6392,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id { auto projection_name_it = node_to_projection_name.find(node); if (projection_name_it != node_to_projection_name.end()) + { + // std::cerr << "resolveExpressionNode taken projection name from map : " << projection_name_it->second << " for \n" << node->dumpTree() << std::endl; result_projection_names.push_back(projection_name_it->second); + } } else { @@ -6408,7 +6415,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use * alias table because in alias table subquery could be evaluated as scalar. */ - //bool use_alias_table = true; + bool use_alias_table = !ignore_alias; if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType()))) use_alias_table = false; @@ -6708,7 +6715,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id if (is_duplicated_alias) scope.non_cached_identifier_lookups_during_expression_resolve.erase({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION}); - resolved_expressions.emplace(node, result_projection_names); + if (!ignore_alias) + resolved_expressions.emplace(node, result_projection_names); scope.popExpressionNode(); bool expression_was_root = scope.expressions_in_resolve_process_stack.empty(); @@ -7672,7 +7680,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif if (auto * identifier_node = array_join_expression->as()) identifier_full_name = identifier_node->getIdentifier().getFullName(); - resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, false); + resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true); auto process_array_join_expression = [&](QueryTreeNodePtr & expression) { @@ -7749,17 +7757,17 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif * with type after ARRAY JOIN. */ array_join_nodes = std::move(array_join_column_expressions); - for (auto & array_join_column_expression : array_join_nodes) - { - auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias()); - if (it != scope.aliases.alias_name_to_expression_node->end()) - { - auto & array_join_column_expression_typed = array_join_column_expression->as(); - auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), - array_join_column_expression_typed.getColumnSource()); - it->second = std::move(array_join_column); - } - } + // for (auto & array_join_column_expression : array_join_nodes) + // { + // auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias()); + // if (it != scope.aliases.alias_name_to_expression_node->end()) + // { + // auto & array_join_column_expression_typed = array_join_column_expression->as(); + // auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), + // array_join_column_expression_typed.getColumnSource()); + // it->second = std::move(array_join_column); + // } + // } } void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope) diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference index 44f3e5a95e9..ad7750228d6 100644 --- a/tests/queries/0_stateless/02374_analyzer_array_join.reference +++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference @@ -47,11 +47,11 @@ SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS val 0 Value [1,2,3] 3 SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; 1 -2 -3 1 -2 -3 +1 +1 +1 +1 SELECT 'ARRAY JOIN with column'; ARRAY JOIN with column SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; @@ -91,12 +91,12 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J SELECT '--'; -- SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -1 -2 -3 -4 -5 -6 +0 +0 +0 +0 +0 +0 SELECT '--'; -- SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; @@ -132,3 +132,7 @@ WHERE NOT ignore(elem) GROUP BY sum(ignore(ignore(ignore(1., 1, 36, 8, 8), ignore(52, 37, 37, '03147_parquet_memory_tracking.parquet', 37, 37, toUInt256(37), 37, 37, toNullable(37), 37, 37), 1., 1, 36, 8, 8), emptyArrayToSingle(arrayMap(x -> toString(x), arrayMap(x -> nullIf(x, 2), arrayJoin([[1]])))))) IGNORE NULLS, modulo(toLowCardinality('03147_parquet_memory_tracking.parquet'), number, toLowCardinality(3)); -- { serverError UNKNOWN_IDENTIFIER } +[1,2] 1 +[1,2] 2 +1 +2 diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql index dfd3b755aff..8c26df1806e 100644 --- a/tests/queries/0_stateless/02374_analyzer_array_join.sql +++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql @@ -80,3 +80,6 @@ GROUP BY -- { echoOff } DROP TABLE test_table; + +select [1, 2] as arr, x from system.one array join arr as x; +select x + 1 as x from (select [number] as arr from numbers(2)) as s array join arr as x; From 9794a193cfb88d7a49b12b9a60986884bf3ebfda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 15:05:49 +0200 Subject: [PATCH 615/651] Rename aggregate_function_group_array_has_limit_size --- .../AggregateFunctionGroupArray.cpp | 11 ++++++----- src/Core/ServerSettings.h | 3 ++- src/Core/SettingsEnums.cpp | 5 +++++ src/Core/SettingsEnums.h | 8 ++++++++ .../configs/group_array_max_element_size.xml | 2 +- .../integration/test_group_array_element_size/test.py | 8 ++++---- 6 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index d4fb7afcb78..c21b1d376d9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -753,10 +753,11 @@ size_t getMaxArraySize() return 0xFFFFFF; } -bool hasLimitArraySize() +bool discardOnLimitReached() { if (auto context = Context::getGlobalContextInstance()) - return context->getServerSettings().aggregate_function_group_array_has_limit_size; + return context->getServerSettings().aggregate_function_group_array_action_when_limit_is_reached + == GroupArrayActionWhenLimitReached::DISCARD; return false; } @@ -767,7 +768,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( { assertUnary(name, argument_types); - bool limit_size = hasLimitArraySize(); + bool has_limit = discardOnLimitReached(); UInt64 max_elems = getMaxArraySize(); if (parameters.empty()) @@ -784,14 +785,14 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( (type == Field::Types::UInt64 && parameters[0].get() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - limit_size = true; + has_limit = true; max_elems = parameters[0].get(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Incorrect number of parameters for aggregate function {}, should be 0 or 1", name); - if (!limit_size) + if (!has_limit) { if (Tlast) throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())"); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index ea0b155b22d..45f235116ab 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -3,6 +3,7 @@ #include #include +#include namespace Poco::Util @@ -51,7 +52,7 @@ namespace DB M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \ M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \ - M(Bool, aggregate_function_group_array_has_limit_size, false, "When the max array element size is exceeded, a `Too large array size` exception will be thrown by default. When set to true, no exception will be thrown, and the excess elements will be discarded.", 0) \ + M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \ M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \ M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \ M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 0caf6e8d609..05985316566 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -229,4 +229,9 @@ IMPLEMENT_SETTING_ENUM(SQLSecurityType, ErrorCodes::BAD_ARGUMENTS, {{"DEFINER", SQLSecurityType::DEFINER}, {"INVOKER", SQLSecurityType::INVOKER}, {"NONE", SQLSecurityType::NONE}}) + +IMPLEMENT_SETTING_ENUM( + GroupArrayActionWhenLimitReached, + ErrorCodes::BAD_ARGUMENTS, + {{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index ab163ba96a3..575cd8700c8 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -370,4 +370,12 @@ DECLARE_SETTING_ENUM(SchemaInferenceMode) DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior) DECLARE_SETTING_ENUM(SQLSecurityType) + +enum class GroupArrayActionWhenLimitReached : uint8_t +{ + THROW, + DISCARD +}; +DECLARE_SETTING_ENUM(GroupArrayActionWhenLimitReached) + } diff --git a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml index 80409d3e18b..32d5d131a44 100644 --- a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml +++ b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml @@ -1,4 +1,4 @@ 10 - false + throw diff --git a/tests/integration/test_group_array_element_size/test.py b/tests/integration/test_group_array_element_size/test.py index 1eb7647d734..90b2712ffbf 100644 --- a/tests/integration/test_group_array_element_size/test.py +++ b/tests/integration/test_group_array_element_size/test.py @@ -80,8 +80,8 @@ def test_limit_size(started_cluster): node2.replace_in_config( "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", - "false", - "true", + "throw", + "discard", ) node2.restart_clickhouse() @@ -91,8 +91,8 @@ def test_limit_size(started_cluster): node2.replace_in_config( "/etc/clickhouse-server/config.d/group_array_max_element_size.xml", - "true", - "false", + "discard", + "throw", ) node2.restart_clickhouse() From 16fb2fc5616ae462c1f658f9765c82d935b456e4 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 24 May 2024 13:13:19 +0000 Subject: [PATCH 616/651] Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts --- ...9_dynamic_aggregating_merge_tree.reference | 32 +++++++++++++++ .../03039_dynamic_aggregating_merge_tree.sh | 40 +++++++++++++++++++ ...39_dynamic_collapsing_merge_tree.reference | 20 ++++++++++ .../03039_dynamic_collapsing_merge_tree.sh | 38 ++++++++++++++++++ ...039_dynamic_replacing_merge_tree.reference | 20 ++++++++++ .../03039_dynamic_replacing_merge_tree.sh | 39 ++++++++++++++++++ ...03039_dynamic_summing_merge_tree.reference | 32 +++++++++++++++ .../03039_dynamic_summing_merge_tree.sh | 40 +++++++++++++++++++ ..._versioned_collapsing_merge_tree.reference | 20 ++++++++++ ...dynamic_versioned_collapsing_merge_tree.sh | 38 ++++++++++++++++++ 10 files changed, 319 insertions(+) create mode 100644 tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh create mode 100644 tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh create mode 100644 tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh create mode 100644 tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh create mode 100644 tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference create mode 100755 tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference new file mode 100644 index 00000000000..3c186fcc935 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference @@ -0,0 +1,32 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh new file mode 100755 index 00000000000..c433d409c7c --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number" + $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference new file mode 100644 index 00000000000..fc293cc2ec8 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +50000 String +50000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh new file mode 100755 index 00000000000..881c9ec64cc --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference new file mode 100644 index 00000000000..132b9df6b26 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree compact + vertical merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String +MergeTree wide + vertical merge +100000 String +100000 UInt64 +50000 UInt64 +100000 String diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh new file mode 100755 index 00000000000..fc9039ac98c --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference new file mode 100644 index 00000000000..3c186fcc935 --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference @@ -0,0 +1,32 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +200000 1 +50000 String +100000 UInt64 +100000 1 +50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh new file mode 100755 index 00000000000..f9da70e95ca --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference new file mode 100644 index 00000000000..cabb0fdefab --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference @@ -0,0 +1,20 @@ +MergeTree compact + horizontal merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + horizontal merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree compact + vertical merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 +MergeTree wide + vertical merge +100000 String +100000 UInt64 +75000 String +75000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh new file mode 100755 index 00000000000..ca313307a6d --- /dev/null +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Fix some settings to avoid timeouts because of some settings randomization +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" + +function test() +{ + $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;" + $CH_CLIENT -q "system stop merges test" + $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)" + $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)" + + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -nm -q "system start merges test; optimize table test final" + $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" + $CH_CLIENT -q "drop table test" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact + horizontal merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" + +echo "MergeTree wide + horizontal merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" + +echo "MergeTree compact + vertical merge" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" + +echo "MergeTree wide + vertical merge" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" From 09750cb83b0ed72c5527aaf6ab9211203aa6b7f8 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 24 May 2024 13:14:02 +0000 Subject: [PATCH 617/651] Delete old tests --- ...9_dynamic_all_merge_algorithms_1.reference | 88 ------------------- .../03039_dynamic_all_merge_algorithms_1.sh | 65 -------------- ...9_dynamic_all_merge_algorithms_2.reference | 44 ---------- .../03039_dynamic_all_merge_algorithms_2.sh | 50 ----------- 4 files changed, 247 deletions(-) delete mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference delete mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh delete mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference delete mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference deleted file mode 100644 index 6c69b81c183..00000000000 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference +++ /dev/null @@ -1,88 +0,0 @@ -MergeTree compact + horizontal merge -ReplacingMergeTree -100000 String -100000 UInt64 -50000 UInt64 -100000 String -SummingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -AggregatingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -MergeTree wide + horizontal merge -ReplacingMergeTree -100000 String -100000 UInt64 -50000 UInt64 -100000 String -SummingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -AggregatingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -MergeTree compact + vertical merge -ReplacingMergeTree -100000 String -100000 UInt64 -50000 UInt64 -100000 String -SummingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -AggregatingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -MergeTree wide + vertical merge -ReplacingMergeTree -100000 String -100000 UInt64 -50000 UInt64 -100000 String -SummingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 -AggregatingMergeTree -100000 String -100000 UInt64 -200000 1 -50000 String -100000 UInt64 -100000 1 -50000 2 diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh deleted file mode 100755 index 9cfd2294c8d..00000000000 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" - - -function test() -{ - echo "ReplacingMergeTree" - $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "drop table test" - - echo "SummingMergeTree" - $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" - $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()" - $CH_CLIENT -q "drop table test" - - echo "AggregatingMergeTree" - $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number" - $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" - $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()" - $CH_CLIENT -q "drop table test" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000" - -echo "MergeTree wide + horizontal merge" -test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000" - -echo "MergeTree compact + vertical merge" -test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" - -echo "MergeTree wide + vertical merge" -test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference deleted file mode 100644 index af6c7d8d567..00000000000 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference +++ /dev/null @@ -1,44 +0,0 @@ -MergeTree compact + horizontal merge -CollapsingMergeTree -100000 String -100000 UInt64 -50000 String -50000 UInt64 -VersionedCollapsingMergeTree -100000 String -100000 UInt64 -75000 String -75000 UInt64 -MergeTree wide + horizontal merge -CollapsingMergeTree -100000 String -100000 UInt64 -50000 String -50000 UInt64 -VersionedCollapsingMergeTree -100000 String -100000 UInt64 -75000 String -75000 UInt64 -MergeTree compact + vertical merge -CollapsingMergeTree -100000 String -100000 UInt64 -50000 String -50000 UInt64 -VersionedCollapsingMergeTree -100000 String -100000 UInt64 -75000 String -75000 UInt64 -MergeTree wide + vertical merge -CollapsingMergeTree -100000 String -100000 UInt64 -50000 String -50000 UInt64 -VersionedCollapsingMergeTree -100000 String -100000 UInt64 -75000 String -75000 UInt64 diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh deleted file mode 100755 index 02362012960..00000000000 --- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" - - -function test() -{ - echo "CollapsingMergeTree" - $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "drop table test" - - echo "VersionedCollapsingMergeTree" - $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "drop table test" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000" - -echo "MergeTree wide + horizontal merge" -test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" - -echo "MergeTree compact + vertical merge" -test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" - -echo "MergeTree wide + vertical merge" -test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" From cb37b098ef23b0575b987edf35db2276bdb02a69 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 16:17:25 +0200 Subject: [PATCH 618/651] CI: add secrets to reusable stage wf yml --- .github/workflows/reusable_test_stage.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/reusable_test_stage.yml b/.github/workflows/reusable_test_stage.yml index d7bd55fab43..8926b43d372 100644 --- a/.github/workflows/reusable_test_stage.yml +++ b/.github/workflows/reusable_test_stage.yml @@ -10,6 +10,10 @@ name: StageWF description: ci data type: string required: true + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false jobs: s: @@ -23,3 +27,5 @@ jobs: test_name: ${{ matrix.job_name_and_runner_type.job_name }} runner_type: ${{ matrix.job_name_and_runner_type.runner_type }} data: ${{ inputs.data }} + secrets: + secret_envs: ${{ secrets.secret_envs }} From 4fba9a5c3c3e79bc4b0174410057206b266eb052 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 24 May 2024 14:35:45 +0000 Subject: [PATCH 619/651] Cleanup. --- src/Analyzer/ArrayJoinNode.cpp | 16 ---- src/Analyzer/ColumnNode.cpp | 7 +- src/Analyzer/Passes/QueryAnalysisPass.cpp | 88 +------------------ src/Analyzer/QueryTreeBuilder.cpp | 4 +- src/Analyzer/createUniqueTableAliases.cpp | 31 ------- src/Parsers/ASTTablesInSelectQuery.cpp | 9 -- src/Parsers/ASTTablesInSelectQuery.h | 4 - src/Parsers/ParserTablesInSelectQuery.cpp | 4 - .../QueryPlan/DistributedCreateLocalPlan.cpp | 8 -- 9 files changed, 6 insertions(+), 165 deletions(-) diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index 37c198f8472..27d7229d46a 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -55,8 +55,6 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const auto array_join_ast = std::make_shared(); array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner; - // array_join_ast->setAlias(getAlias()); - auto array_join_expressions_ast = std::make_shared(); const auto & array_join_expressions = getJoinExpressions().getNodes(); @@ -70,21 +68,7 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const else array_join_expression_ast = array_join_expression->toAST(options); - // QueryTreeNodePtr column_source; - // if (column_node) - // column_source = column_node->getColumnSourceOrNull(); - - // if (column_source && column_source->hasAlias()) - // { - // const auto & column_alias = column_node->getAlias(); - // const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias; - - // if (!name_or_alias.starts_with("__")) - // array_join_expression_ast->setAlias(fmt::format("{}.{}", column_source->getAlias(), name_or_alias)); - // } - // else array_join_expression_ast->setAlias(array_join_expression->getAlias()); - array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast)); } diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp index d12eac68ab4..2b514a85121 100644 --- a/src/Analyzer/ColumnNode.cpp +++ b/src/Analyzer/ColumnNode.cpp @@ -103,15 +103,10 @@ ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const if (column_source && options.fully_qualified_identifiers) { auto node_type = column_source->getNodeType(); - - // if (node_type == QueryTreeNodeType::ARRAY_JOIN && column_source->hasAlias()) - // return std::make_shared(std::string(fmt::format("{}.{}", column_source->getAlias(), column.name))); - if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::TABLE_FUNCTION || node_type == QueryTreeNodeType::QUERY || - node_type == QueryTreeNodeType::UNION)// || - //node_type == QueryTreeNodeType::ARRAY_JOIN) + node_type == QueryTreeNodeType::UNION) { if (column_source->hasAlias()) { diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 871c3842de0..a5992148b14 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -607,6 +607,8 @@ struct ScopeAliases std::unordered_set nodes_with_duplicated_aliases; std::vector cloned_nodes_with_duplicated_aliases; + /// Names which are aliases from ARRAY JOIN. + /// This is needed to properly qualify columns from matchers and avoid name collision. std::unordered_set array_join_aliases; std::unordered_map & getAliasMap(IdentifierLookupContext lookup_context) @@ -1070,25 +1072,10 @@ public: void visitImpl(QueryTreeNodePtr & node) { updateAliasesIfNeeded(node, false /*is_lambda_node*/); - - // if (auto * array_join_node = node->as()) - // { - // for (const auto & elem : array_join_node->getJoinExpressions()) - // { - // for (auto & child : elem->getChildren()) - // { - // // std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl; - // visit(child); - // } - // } - // } } bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child) { - // if (parent->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) - // return false; - if (auto * lambda_node = child->as()) { updateAliasesIfNeeded(child, true /*is_lambda_node*/); @@ -1131,8 +1118,6 @@ private: if (node->getNodeType() == QueryTreeNodeType::WINDOW) return; - // std::cerr << ">>>>>>>>>> " << node->dumpTree() << std::endl; - const auto & alias = node->getAlias(); if (is_lambda_node) @@ -2926,7 +2911,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier IdentifierResolveSettings identifier_resolve_settings) { const auto & identifier_bind_part = identifier_lookup.identifier.front(); - // std::cerr << "tryResolveIdentifierFromAliases " << identifier_lookup.dump() << std::endl; auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME); if (it == nullptr) @@ -2955,7 +2939,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier } auto node_type = alias_node->getNodeType(); - // std::cerr << "tryResolveIdentifierFromAliases 1.5 \n" << alias_node->dumpTree() << std::endl; /// Resolve expression if necessary if (node_type == QueryTreeNodeType::IDENTIFIER) @@ -2964,7 +2947,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier auto & alias_identifier_node = alias_node->as(); auto identifier = alias_identifier_node.getIdentifier(); - // std::cerr << "tryResolveIdentifierFromAliases 2 " << identifier.getFullName() << std::endl; auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); if (!lookup_result.resolved_identifier) { @@ -3141,7 +3123,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage( size_t identifier_column_qualifier_parts, bool can_be_not_found) { - // std::cerr << "tryResolveIdentifierFromStorage " << identifier.getFullName() << std::endl; auto identifier_without_column_qualifier = identifier; identifier_without_column_qualifier.popFirst(identifier_column_qualifier_parts); @@ -3284,7 +3265,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage( { auto qualified_identifier_with_removed_part = qualified_identifier; qualified_identifier_with_removed_part.popFirst(); - // std::cerr << "tryResolveIdentifierFromStorage qualified_identifier_with_removed_part" << qualified_identifier_with_removed_part.getFullName() << std::endl; if (qualified_identifier_with_removed_part.empty()) break; @@ -3818,8 +3798,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) { - // std::cerr << "tryResolveExpressionFromArrayJoinExpressions " << scope.dump() << std::endl; - const auto & array_join_node = table_expression_node->as(); const auto & array_join_column_expressions_list = array_join_node.getJoinExpressions(); const auto & array_join_column_expressions_nodes = array_join_column_expressions_list.getNodes(); @@ -3897,14 +3875,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) { - // std::cerr << "tryResolveIdentifierFromArrayJoin " << identifier_lookup.identifier.getFullName() << std::endl; - const auto & from_array_join_node = table_expression_node->as(); auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope); - // std::cerr << "tryResolveIdentifierFromArrayJoin 2 " << scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) - // << ' ' << identifier_lookup.dump() << ' ' << (resolved_identifier ? resolved_identifier->dumpTree() : "not resolved ") << std::endl; - if (scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) || !identifier_lookup.isExpressionLookup()) return resolved_identifier; @@ -3919,8 +3892,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi for (const auto & array_join_column_expression : array_join_column_expressions_nodes) { auto & array_join_column_expression_typed = array_join_column_expression->as(); - // std::cerr << "========== " << array_join_column_expression->dumpTree() << std::endl; - // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl; IdentifierView identifier_view(identifier_lookup.identifier); @@ -3955,15 +3926,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi if (compound_expr) return compound_expr; - - // const auto & parts = identifier_lookup.identifier.getParts(); - // if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() || - // (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias())) - // { - // auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), - // array_join_column_expression_typed.getColumnSource()); - // return array_join_column; - // } } if (!resolved_identifier) @@ -3980,8 +3942,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident const QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope) { - // std::cerr << "tryResolveIdentifierFromJoinTreeNode " << identifier_lookup.identifier.getFullName() << std::endl; - auto join_tree_node_type = join_tree_node->getNodeType(); switch (join_tree_node_type) @@ -4185,8 +4145,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook IdentifierResolveScope & scope, IdentifierResolveSettings identifier_resolve_settings) { - // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl; - auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup); if (it != scope.identifier_lookup_to_resolve_state.end()) { @@ -6363,8 +6321,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id { checkStackSize(); - // std::cerr << "resolveExpressionNode " << ignore_alias << "\n" << node->dumpTree() << std::endl; - auto resolved_expression_it = resolved_expressions.find(node); if (resolved_expression_it != resolved_expressions.end()) { @@ -6381,7 +6337,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id evaluateScalarSubqueryIfNeeded(node, subquery_scope); } - // std::cerr << "resolveExpressionNode taken from cache \n" << node->dumpTree() << "\n PN " << (resolved_expression_it->second.empty() ? "" : resolved_expression_it->second.front()) << std::endl; return resolved_expression_it->second; } @@ -6392,10 +6347,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id { auto projection_name_it = node_to_projection_name.find(node); if (projection_name_it != node_to_projection_name.end()) - { - // std::cerr << "resolveExpressionNode taken projection name from map : " << projection_name_it->second << " for \n" << node->dumpTree() << std::endl; result_projection_names.push_back(projection_name_it->second); - } } else { @@ -7651,36 +7603,25 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif for (auto & array_join_expression : array_join_nodes) { auto array_join_expression_alias = array_join_expression->getAlias(); - // if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias)) - // throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, - // "ARRAY JOIN expression {} with duplicate alias {}. In scope {}", - // array_join_expression->formatASTForErrorMessage(), - // array_join_expression_alias, - // scope.scope_node->formatASTForErrorMessage()); - - /// Add array join expression into scope for (const auto & elem : array_join_nodes) { if (elem->hasAlias()) scope.aliases.array_join_aliases.insert(elem->getAlias()); + for (auto & child : elem->getChildren()) { - //std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl; if (child) expressions_visitor.visit(child); - //visit(child); } } - // expressions_visitor.visit(array_join_expression); - std::string identifier_full_name; if (auto * identifier_node = array_join_expression->as()) identifier_full_name = identifier_node->getIdentifier().getFullName(); - resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true); + resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true /*ignore_alias*/); auto process_array_join_expression = [&](QueryTreeNodePtr & expression) { @@ -7747,27 +7688,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif } } - /** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because - * during resolution of ARRAY JOIN expression list we must use column type before ARRAY JOIN. - * - * Example: SELECT id, value_element FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value - * It is expected that `value_element AS value` expression inside ARRAY JOIN expression list will be - * resolved as `value_element` expression with type before ARRAY JOIN. - * And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression - * with type after ARRAY JOIN. - */ array_join_nodes = std::move(array_join_column_expressions); - // for (auto & array_join_column_expression : array_join_nodes) - // { - // auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias()); - // if (it != scope.aliases.alias_name_to_expression_node->end()) - // { - // auto & array_join_column_expression_typed = array_join_column_expression->as(); - // auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), - // array_join_column_expression_typed.getColumnSource()); - // it->second = std::move(array_join_column); - // } - // } } void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope) @@ -8552,7 +8473,6 @@ QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_ana void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - // std::cerr << ".... qap\n" << query_tree_node->dumpTree() << std::endl; QueryAnalyzer analyzer(only_analyze); analyzer.resolve(query_tree_node, table_expression, context); createUniqueTableAliases(query_tree_node, table_expression, context); diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 02d742f5e49..6a5db4bc1de 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -957,7 +957,6 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context); auto array_join_node = std::make_shared(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join); - // array_join_node->setAlias(array_join_expression.tryGetAlias()); /** Original AST is not set because it will contain only array join part and does * not include left table expression. @@ -1046,8 +1045,7 @@ ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context) { QueryTreeBuilder builder(std::move(query), context); - auto qt = builder.getQueryTreeNode(); - return qt; + return builder.getQueryTreeNode(); } } diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp index 30b8c0a433b..b36ba1cafaa 100644 --- a/src/Analyzer/createUniqueTableAliases.cpp +++ b/src/Analyzer/createUniqueTableAliases.cpp @@ -61,37 +61,6 @@ public: node->setAlias(alias); } - if (auto * array_join = node->as()) - { - //size_t counter = 0; - for (auto & column : array_join->getJoinExpressions()) - { - if (auto * column_node = column->as()) - { - if (!column_node->hasAlias()) - column_node->setAlias(column_node->getColumnName()); - } - } - } - - // if (auto * array_join = node->as()) - // { - // for (auto & column : array_join->getJoinExpressions()) - // { - // if (auto * column_node = column->as()) - // { - // const auto & column_alias = column_node->getAlias(); - // const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias; - - // if (!name_or_alias.starts_with("__")) - // { - - // column_node->setAlias(fmt::format("{}.{}", alias, name_or_alias)); - // } - // } - // } - // } - break; } default: diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index b4058a0950d..e782bad797e 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -247,12 +247,6 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta formatImplAfterTable(settings, state, frame); } -// static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings) -// { -// settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : ""); -// settings.writeIdentifier(name); -// settings.ostr << (settings.hilite ? IAST::hilite_none : ""); -// } void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { @@ -264,9 +258,6 @@ void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & sta << indent_str << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : ""); - // if (!alias.empty()) - // writeAlias(alias, settings); - settings.one_line ? expression_list->formatImpl(settings, state, frame) : expression_list->as().formatImplMultiline(settings, state, frame); diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h index 212436b0d9e..f3f329ca2b6 100644 --- a/src/Parsers/ASTTablesInSelectQuery.h +++ b/src/Parsers/ASTTablesInSelectQuery.h @@ -95,10 +95,6 @@ struct ASTArrayJoin : public IAST /// List of array or nested names to JOIN, possible with aliases. ASTPtr expression_list; - // String alias; - - // String tryGetAlias() const override { return alias; } - // void setAlias(const String & to) override { alias = to; } using IAST::IAST; String getID(char) const override { return "ArrayJoin"; } diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index c96b6c1584d..b4d48ae67e9 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -98,10 +98,6 @@ bool ParserArrayJoin::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!has_array_join) return false; - // ASTPtr alias_node; - // if (ParserAlias(false).parse(pos, alias_node, expected)) - // tryGetIdentifierNameInto(alias_node, res->alias); - if (!ParserExpressionList(false).parse(pos, res->expression_list, expected)) return false; diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index e4d908e2af0..d4545482477 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -2,7 +2,6 @@ #include #include -#include "Parsers/queryToString.h" #include #include #include @@ -69,19 +68,12 @@ std::unique_ptr createLocalPlan( if (context->getSettingsRef().allow_experimental_analyzer) { - // std::cerr << query_ast->dumpTree() << std::endl; - // std::cerr << queryToString(query_ast) << std::endl; - /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). new_context->setSetting("enable_positional_arguments", Field(false)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); - // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl; query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); - // WriteBufferFromOwnString buf; - // query_plan->explainPlan(buf, {.header=true, .actions=true}); - // std::cerr << buf.str() << std::endl; } else { From dff7a2f1f6bab1a49669a06f95990d34e71c2cf6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 24 May 2024 14:37:33 +0000 Subject: [PATCH 620/651] Cleanup. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 -- src/Analyzer/createUniqueTableAliases.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a5992148b14..3fca66e6eb8 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3995,8 +3995,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie if (identifier_lookup.isFunctionLookup()) return {}; - // std::cerr << "tryResolveIdentifierFromJoinTree " << identifier_lookup.identifier.getFullName() << std::endl; - /// Try to resolve identifier from table columns if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope)) return resolved_identifier; diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp index b36ba1cafaa..8f850fe8dec 100644 --- a/src/Analyzer/createUniqueTableAliases.cpp +++ b/src/Analyzer/createUniqueTableAliases.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include #include #include #include @@ -60,7 +58,6 @@ public: alias = fmt::format("__table{}", ++next_id); node->setAlias(alias); } - break; } default: From b254be618087e8f949f420406e791b24d11c960a Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 16:57:08 +0200 Subject: [PATCH 621/651] CI: add secrets to reusable build stage wf yml --- .github/workflows/reusable_build_stage.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/reusable_build_stage.yml b/.github/workflows/reusable_build_stage.yml index 4463645880b..a8e84819c95 100644 --- a/.github/workflows/reusable_build_stage.yml +++ b/.github/workflows/reusable_build_stage.yml @@ -13,6 +13,10 @@ name: BuildStageWF description: ci data type: string required: true + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false jobs: s: @@ -30,3 +34,5 @@ jobs: # for now let's do I deep checkout for builds checkout_depth: 0 data: ${{ inputs.data }} + secrets: + secret_envs: ${{ secrets.secret_envs }} From 4982d7c85cc7a71ddef773cd57df540e7b8cd33a Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 16:59:47 +0200 Subject: [PATCH 622/651] fix for mark release ready --- .github/workflows/master.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7c55098bdfd..c2a893a8e99 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -136,7 +136,7 @@ jobs: MarkReleaseReady: if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, Builds_1] + needs: [RunConfig, Builds_1, Builds_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Debug From b3f836fbb1b451c08d57f4956c0a9c5137fe5ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 17:08:30 +0200 Subject: [PATCH 623/651] Run 03147_system_columns_access_checks only on release --- tests/queries/0_stateless/03147_system_columns_access_checks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh index 2bd7fb083ea..b027ea28504 100755 --- a/tests/queries/0_stateless/03147_system_columns_access_checks.sh +++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-ordinary-database, long +# Tags: no-fasttest, no-parallel, no-ordinary-database, long, no-debug, no-asan, no-tsan, no-msan CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From d5b763d03d581b70b1243ab589223d85d231fe89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 17:21:50 +0200 Subject: [PATCH 624/651] Limit max time for 01442_merge_detach_attach_long --- .../01442_merge_detach_attach_long.sh | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index acb2550d48c..e7c20158b5d 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-debug +# Tags: long, no-parallel set -e @@ -11,14 +11,24 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY tuple()" -for _ in {1..100}; do - ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" - ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" - ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL" 2>/dev/null & - ${CLICKHOUSE_CLIENT} --query="ALTER TABLE t DETACH PARTITION tuple()" - ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM t HAVING count() > 0" -done +function thread_ops() +{ + local TIMELIMIT=$((SECONDS+$1)) + local it=0 + while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 100 ]; + do + it=$((it+1)) + ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" + ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)" + ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL" 2>/dev/null & + ${CLICKHOUSE_CLIENT} --query="ALTER TABLE t DETACH PARTITION tuple()" + ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM t HAVING count() > 0" + done +} +export -f thread_ops +TIMEOUT=60 +thread_ops $TIMEOUT & wait $CLICKHOUSE_CLIENT -q "DROP TABLE t" From bd415cc83192a734dccb00bd004775e46bd74b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 17:27:47 +0200 Subject: [PATCH 625/651] Reduce 02228_merge_tree_insert_memory_usage partitions --- .../02228_merge_tree_insert_memory_usage.sql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index 8924627a717..26a201ec89f 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,16 +1,16 @@ -- Tags: long, no-parallel -SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 1024 partitions, so disable fault injections +SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush drop table if exists data_02228; -create table data_02228 (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 1024; -insert into data_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; -insert into data_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=10000000; -- { serverError MEMORY_LIMIT_EXCEEDED } +create table data_02228 (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 100; +insert into data_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; +insert into data_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=1000000; -- { serverError MEMORY_LIMIT_EXCEEDED } drop table data_02228; drop table if exists data_rep_02228 SYNC; -create table data_rep_02228 (key1 UInt32, sign Int8, s UInt64) engine = ReplicatedCollapsingMergeTree('/clickhouse/{database}', 'r1', sign) order by (key1) partition by key1 % 1024; -insert into data_rep_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; -insert into data_rep_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=10000000; -- { serverError MEMORY_LIMIT_EXCEEDED } +create table data_rep_02228 (key1 UInt32, sign Int8, s UInt64) engine = ReplicatedCollapsingMergeTree('/clickhouse/{database}', 'r1', sign) order by (key1) partition by key1 % 100; +insert into data_rep_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0; +insert into data_rep_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=1000000; -- { serverError MEMORY_LIMIT_EXCEEDED } drop table data_rep_02228 SYNC; From b396e63ea5721f72e0a1efb15e1c108c93dfad2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 17:30:26 +0200 Subject: [PATCH 626/651] Reduce sizes in 02735_parquet_encoder --- tests/queries/0_stateless/02735_parquet_encoder.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index fe45a2a317d..9320d0e57c3 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -41,7 +41,7 @@ create temporary table basic_types_02735 as select * from generateRandom(' decimal128 Decimal128(20), decimal256 Decimal256(40), ipv4 IPv4, - ipv6 IPv6') limit 10101; + ipv6 IPv6') limit 1011; insert into function file(basic_types_02735.parquet) select * from basic_types_02735; desc file(basic_types_02735.parquet); select (select sum(cityHash64(*)) from basic_types_02735) - (select sum(cityHash64(*)) from file(basic_types_02735.parquet)); @@ -59,7 +59,7 @@ create temporary table nullables_02735 as select * from generateRandom(' fstr Nullable(FixedString(12)), i256 Nullable(Int256), decimal256 Nullable(Decimal256(40)), - ipv6 Nullable(IPv6)') limit 10000; + ipv6 Nullable(IPv6)') limit 1000; insert into function file(nullables_02735.parquet) select * from nullables_02735; select (select sum(cityHash64(*)) from nullables_02735) - (select sum(cityHash64(*)) from file(nullables_02735.parquet)); drop table nullables_02735; @@ -83,7 +83,7 @@ create table arrays_02735 engine = Memory as select * from generateRandom(' decimal64 Array(Decimal64(10)), ipv4 Array(IPv4), msi Map(String, Int16), - tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 10000; + tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 1000; insert into function file(arrays_02735.parquet) select * from arrays_02735; create temporary table arrays_out_02735 as arrays_02735; insert into arrays_out_02735 select * from file(arrays_02735.parquet); @@ -107,7 +107,7 @@ create temporary table madness_02735 as select * from generateRandom(' mln Map(LowCardinality(String), Nullable(Int8)), t Tuple(Map(FixedString(5), Tuple(Array(UInt16), Nullable(UInt16), Array(Tuple(Int8, Decimal64(10))))), Tuple(kitchen UInt64, sink String)), n Nested(hello UInt64, world Tuple(first String, second FixedString(1))) - ') limit 10000; + ') limit 1000; insert into function file(madness_02735.parquet) select * from madness_02735; insert into function file(a.csv) select * from madness_02735 order by tuple(*); insert into function file(b.csv) select aa, aaa, an, aan, l, ln, arrayMap(x->reinterpret(x, 'UInt128'), al) as al_, aaln, mln, t, n.hello, n.world from file(madness_02735.parquet) order by tuple(aa, aaa, an, aan, l, ln, al_, aaln, mln, t, n.hello, n.world); From 24797a093a216479d70b2b0e065d9f3850d484bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 17:31:39 +0200 Subject: [PATCH 627/651] Remove 02344_insert_profile_events_stress from sanitizer run as it's too slow --- .../queries/0_stateless/02344_insert_profile_events_stress.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql index f9fdd3b943f..e9a790bea5d 100644 --- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql +++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, long, no-debug, no-tsan +-- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan create table data_02344 (key Int) engine=Null; -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung. From 049ca7c71e5c3543e4a63d22f075de2ff96373c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 24 May 2024 17:34:48 +0200 Subject: [PATCH 628/651] Reduce 01396_inactive_replica_cleanup_nodes_zookeeper! --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 67a2a70b509..11102b128b2 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: replica, no-debug, no-parallel +# Tags: replica, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -11,7 +11,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')") # Check that if we have one inactive replica and a huge number of INSERTs to active replicas, # the number of nodes in ZooKeeper does not grow unbounded. -SCALE=5000 +SCALE=1000 $CLICKHOUSE_CLIENT -n --query " DROP TABLE IF EXISTS r1; From 7f9734d0cc9dc270ea129b75881234ace3cdf1fa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 24 May 2024 15:38:21 +0000 Subject: [PATCH 629/651] Fix Logical error: Bad cast for Buffer table with prewhere. --- src/Storages/StorageBuffer.cpp | 2 ++ .../0_stateless/00910_buffer_prewhere_different_types.sql | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d9a0b2b4d59..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -302,6 +302,8 @@ void StorageBuffer::read( auto src_table_query_info = query_info; if (src_table_query_info.prewhere_info) { + src_table_query_info.prewhere_info = src_table_query_info.prewhere_info->clone(); + auto actions_dag = ActionsDAG::makeConvertingActions( header_after_adding_defaults.getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), diff --git a/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql b/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql index 8f305914cb8..702d9bb3e6c 100644 --- a/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql +++ b/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql @@ -2,8 +2,14 @@ DROP TABLE IF EXISTS buffer_table1__fuzz_28; DROP TABLE IF EXISTS merge_tree_table1; CREATE TABLE merge_tree_table1 (`x` UInt32) ENGINE = MergeTree ORDER BY x; + +CREATE TABLE buffer_table1__fuzz_24 (`s` Nullable(Int128), `x` Nullable(FixedString(17))) ENGINE = Buffer(currentDatabase(), 'merge_tree_table1', 16, 10, 60, 10, 1000, 1048576, 2097152); +SELECT s FROM buffer_table1__fuzz_24 PREWHERE factorial(toNullable(10)); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } + INSERT INTO merge_tree_table1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +SELECT s FROM buffer_table1__fuzz_24 PREWHERE factorial(toNullable(10)); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } + SET send_logs_level='error'; CREATE TABLE buffer_table1__fuzz_28 (`x` Nullable(UInt32)) ENGINE = Buffer(currentDatabase(), 'merge_tree_table1', 16, 10, 60, 10, 1000, 1048576, 2097152); From 2669df7296a1b362807693d0cc41833ecf80a148 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 24 May 2024 17:30:36 +0200 Subject: [PATCH 630/651] add secrets to reusable build yml --- .github/workflows/reusable_build.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index 80d78d93e1b..5e254d785ec 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -33,6 +33,10 @@ name: Build ClickHouse additional_envs: description: additional ENV variables to setup the job type: string + secrets: + secret_envs: + description: if given, it's passed to the environments + required: false jobs: Build: @@ -54,6 +58,7 @@ jobs: run: | cat >> "$GITHUB_ENV" << 'EOF' ${{inputs.additional_envs}} + ${{secrets.secret_envs}} DOCKER_TAG< Date: Fri, 24 May 2024 17:44:14 +0200 Subject: [PATCH 631/651] Restore tags --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 2 +- tests/queries/0_stateless/01442_merge_detach_attach_long.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 11102b128b2..1c1eb4489ee 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: replica, no-parallel +# Tags: replica, no-debug, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index e7c20158b5d..85fdf7ed764 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long, no-parallel, no-debug set -e From 772d38a0c139ca5ee76bd7886d70db874db503c0 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 24 May 2024 18:11:21 +0200 Subject: [PATCH 632/651] Update s3queue.md --- docs/en/engines/table-engines/integrations/s3queue.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 8ebab80423f..aa7fa512480 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -202,8 +202,7 @@ Example: CREATE TABLE s3queue_engine_table (name String, value UInt32) ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') SETTINGS - mode = 'unordered', - keeper_path = '/clickhouse/s3queue/'; + mode = 'unordered'; CREATE TABLE stats (name String, value UInt32) ENGINE = MergeTree() ORDER BY name; From e59097274a72216e99dbec83cbbe4f5142463799 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Fri, 24 May 2024 13:56:16 -0300 Subject: [PATCH 633/651] test for #64211 --- ...uted_merge_global_in_primary_key.reference | 19 +++++ ...istributed_merge_global_in_primary_key.sql | 83 +++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference create mode 100644 tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference new file mode 100644 index 00000000000..f572a3570f4 --- /dev/null +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference @@ -0,0 +1,19 @@ +------------------- Distributed ------------------ +1 +---------- merge() over distributed -------------- +2 +---------- merge() over local -------------------- +1 +1 +1 +---------- remote() over Merge ------------------- +2 +---------- Distributed over Merge ---------------- +1 +---------- remote() over Merge ------------------- +2 +---------- Merge over Distributed ----------------- +1 +1 +1 +2 diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql new file mode 100644 index 00000000000..78176e346f4 --- /dev/null +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql @@ -0,0 +1,83 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/64211 + +create database test; +use test; + +CREATE TABLE test_local (name String) +ENGINE = MergeTree +ORDER BY name as select 'x'; + +CREATE TABLE test_distributed as test_local +ENGINE = Distributed(default, currentDatabase(), test_local); + +CREATE TABLE test_merge as test_local +ENGINE = Merge(currentDatabase(), 'test_local'); + +CREATE TABLE test_merge_distributed as test_local +ENGINE = Distributed(default, currentDatabase(), test_merge); + +CREATE TABLE test_distributed_merge as test_local +ENGINE = Merge(currentDatabase(), 'test_distributed'); + +SELECT '------------------- Distributed ------------------'; +SELECT count() +FROM test_distributed +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT '---------- merge() over distributed --------------'; +SELECT count() +FROM merge(currentDatabase(), 'test_distributed') +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT '---------- merge() over local --------------------'; +SELECT count() +FROM merge(currentDatabase(), 'test_local') +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT count() +FROM merge(currentDatabase(), 'test_local') +WHERE name GLOBAL IN (SELECT name FROM merge(currentDatabase(), 'test_local')); + +SELECT count() +FROM merge(currentDatabase(), 'test_local') +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + +SELECT '---------- remote() over Merge -------------------'; +SELECT count() +FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge) +WHERE name GLOBAL IN (SELECT name FROM test_distributed); + +SELECT '---------- Distributed over Merge ----------------'; +SELECT count() +FROM test_merge_distributed +WHERE name GLOBAL IN (SELECT name FROM test_merge_distributed); + +SELECT '---------- remote() over Merge -------------------'; +SELECT count() +FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge) +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + +SELECT '---------- Merge over Distributed -----------------'; +SELECT count() +FROM test_distributed_merge +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + +SELECT count() +FROM test_distributed_merge +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_distributed_merge)); + +SELECT count() +FROM test_distributed_merge +WHERE name GLOBAL IN (SELECT name FROM test_distributed_merge); + +SELECT count() +FROM remote('127.0.0.{1,2}', currentDatabase(), test_distributed_merge) +WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)); + + +DROP TABLE test_merge; +DROP TABLE test_merge_distributed; +DROP TABLE test_distributed_merge; +DROP TABLE test_distributed; +DROP TABLE test_local; +drop database test; From 9a917db4b3eade94941225b4a792f4d2331459ba Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Fri, 24 May 2024 14:27:26 -0300 Subject: [PATCH 634/651] Update 01227_distributed_merge_global_in_primary_key.sql --- .../01227_distributed_merge_global_in_primary_key.sql | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql index 78176e346f4..e73d07c193f 100644 --- a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql @@ -1,8 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/64211 -create database test; -use test; - CREATE TABLE test_local (name String) ENGINE = MergeTree ORDER BY name as select 'x'; @@ -80,4 +77,3 @@ DROP TABLE test_merge_distributed; DROP TABLE test_distributed_merge; DROP TABLE test_distributed; DROP TABLE test_local; -drop database test; From 91a84f8e17192a70b48d3152ad8b48107d60c117 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Fri, 24 May 2024 15:03:45 -0300 Subject: [PATCH 635/651] Update 01227_distributed_merge_global_in_primary_key.sql --- .../01227_distributed_merge_global_in_primary_key.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql index e73d07c193f..5cd4aaab1e6 100644 --- a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql @@ -5,13 +5,13 @@ ENGINE = MergeTree ORDER BY name as select 'x'; CREATE TABLE test_distributed as test_local -ENGINE = Distributed(default, currentDatabase(), test_local); +ENGINE = Distributed(test_shard_localhost, currentDatabase(), test_local); CREATE TABLE test_merge as test_local ENGINE = Merge(currentDatabase(), 'test_local'); CREATE TABLE test_merge_distributed as test_local -ENGINE = Distributed(default, currentDatabase(), test_merge); +ENGINE = Distributed(test_shard_localhost, currentDatabase(), test_merge); CREATE TABLE test_distributed_merge as test_local ENGINE = Merge(currentDatabase(), 'test_distributed'); From 3ed1ec2f63582819f005d591459f30cdbff0daff Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Fri, 24 May 2024 23:54:56 -0300 Subject: [PATCH 636/651] Update tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql Co-authored-by: Nikita Mikhaylov --- .../01227_distributed_merge_global_in_primary_key.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql index 5cd4aaab1e6..6b0dd4c8747 100644 --- a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql +++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql @@ -1,5 +1,9 @@ -- https://github.com/ClickHouse/ClickHouse/issues/64211 - +DROP TABLE IF EXISTS test_merge; +DROP TABLE IF EXISTS test_merge_distributed; +DROP TABLE IF EXISTS test_distributed_merge; +DROP TABLE IF EXISTS test_distributed; +DROP TABLE IF EXISTS test_local; CREATE TABLE test_local (name String) ENGINE = MergeTree ORDER BY name as select 'x'; From 031591f3dd5ae155e3a8d8cf061e2956a29e6a4a Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 25 May 2024 15:48:45 +0200 Subject: [PATCH 637/651] Fix settings changes history --- src/Core/SettingsChangesHistory.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index a89516436e8..16f28d94640 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,14 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.6", {{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, + {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, + {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, + {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, + {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, + {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + }}, {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, @@ -93,13 +101,6 @@ static std::map sett {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, {"http_max_chunk_size", 0, 0, "Internal limitation"}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, From 142d67d1b298478a0df46b2585d4719a9ef55f4e Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 26 May 2024 11:16:48 +0200 Subject: [PATCH 638/651] Fix S3ObjectStorage::applyNewSettings --- .../ObjectStorages/S3/S3ObjectStorage.cpp | 21 ++++++++----------- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 5 +---- .../ObjectStorage/S3/Configuration.cpp | 2 +- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index c07313b52db..69485bd4d01 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -575,24 +575,21 @@ void S3ObjectStorage::applyNewSettings( ContextPtr context, const ApplyNewSettingsOptions & options) { - auto new_s3_settings = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings); - if (!static_headers.empty()) - { - new_s3_settings->auth_settings.headers.insert( - new_s3_settings->auth_settings.headers.end(), - static_headers.begin(), static_headers.end()); - } + auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings); + auto modified_settings = std::make_unique(*s3_settings.get()); + modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings); if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName())) - new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); - auto current_s3_settings = s3_settings.get(); - if (options.allow_client_change && (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3)) + auto current_settings = s3_settings.get(); + if (options.allow_client_change + && (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3)) { - auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri); + auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri); client.set(std::move(new_client)); } - s3_settings.set(std::move(new_s3_settings)); + s3_settings.set(std::move(modified_settings)); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 1fff6d67e23..062ddd4e2a2 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -54,8 +54,7 @@ private: const S3Capabilities & s3_capabilities_, ObjectStorageKeysGeneratorPtr key_generator_, const String & disk_name_, - bool for_disk_s3_ = true, - const HTTPHeaderEntries & static_headers_ = {}) + bool for_disk_s3_ = true) : uri(uri_) , disk_name(disk_name_) , client(std::move(client_)) @@ -64,7 +63,6 @@ private: , key_generator(std::move(key_generator_)) , log(getLogger(logger_name)) , for_disk_s3(for_disk_s3_) - , static_headers(static_headers_) { } @@ -189,7 +187,6 @@ private: LoggerPtr log; const bool for_disk_s3; - const HTTPHeaderEntries static_headers; }; } diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 6b6cde0c431..4b217b94730 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -136,7 +136,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, return std::make_shared( std::move(client), std::move(s3_settings), url, s3_capabilities, - key_generator, "StorageS3", false, headers_from_ast); + key_generator, "StorageS3", false); } void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection) From 14f259d9d7a9d53ed8d1c64be36be20a622bf7ce Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 26 May 2024 13:54:35 +0000 Subject: [PATCH 639/651] Fix flaky test --- tests/queries/0_stateless/03130_generateSnowflakeId.reference | 2 -- tests/queries/0_stateless/03130_generateSnowflakeId.sql | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index 6ec0cafab16..f5b7872f81e 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -1,11 +1,9 @@ -- generateSnowflakeID 1 -1 0 0 1 100 -- generateSnowflakeIDThreadMonotonic 1 -1 100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index 903be5b786c..57cdd21a9fe 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -1,6 +1,5 @@ SELECT '-- generateSnowflakeID'; -SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -- check machine sequence number is zero SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs @@ -18,7 +17,6 @@ FROM SELECT '-- generateSnowflakeIDThreadMonotonic'; -SELECT bitShiftLeft(toUInt64(generateSnowflakeIDThreadMonotonic()), 52) = 0; -- check machine sequence number is zero SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero SELECT generateSnowflakeIDThreadMonotonic(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } From 8f4422d72917c1885a892200e267268f6b2e3b98 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 26 May 2024 14:07:50 +0000 Subject: [PATCH 640/651] Test analyzer and non-analyzer execution --- .../02494_query_cache_nested_query_bug.reference | 2 ++ .../02494_query_cache_nested_query_bug.sh | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference index b261da18d51..9ec033cefb1 100644 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference @@ -1,2 +1,4 @@ +2 +0 1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh index a5339a098dc..6bc3d03ac66 100755 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh @@ -15,11 +15,17 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)" ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)" -SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" +SETTINGS_NO_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" +SETTINGS_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0" # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated # with EXPLAIN PLAN, we need to check the logs. -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l + +${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE" + +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE" From 3ee2307024c9a7b2c54247335f0fb0f0f54380e4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 27 May 2024 10:04:19 +0200 Subject: [PATCH 641/651] Revert "Refactoring of Server.h: Isolate server management from other logic" --- programs/server/Server.cpp | 987 +++++++++++++++++- programs/server/Server.h | 95 +- src/CMakeLists.txt | 1 - src/Server/ServersManager/IServersManager.cpp | 268 ----- src/Server/ServersManager/IServersManager.h | 74 -- .../ServersManager/InterServersManager.cpp | 327 ------ .../ServersManager/InterServersManager.h | 44 - .../ServersManager/ProtocolServersManager.cpp | 523 ---------- .../ServersManager/ProtocolServersManager.h | 37 - 9 files changed, 1032 insertions(+), 1324 deletions(-) delete mode 100644 src/Server/ServersManager/IServersManager.cpp delete mode 100644 src/Server/ServersManager/IServersManager.h delete mode 100644 src/Server/ServersManager/InterServersManager.cpp delete mode 100644 src/Server/ServersManager/InterServersManager.h delete mode 100644 src/Server/ServersManager/ProtocolServersManager.cpp delete mode 100644 src/Server/ServersManager/ProtocolServersManager.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b62ae40924c..223bc1f77e7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -42,9 +44,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -79,19 +83,29 @@ #include #include #include +#include #include "MetricsTransmitter.h" #include +#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include #include #include #include +#include #include "config.h" #include @@ -105,9 +119,19 @@ #endif #if USE_SSL +# include # include #endif +#if USE_GRPC +# include +#endif + +#if USE_NURAFT +# include +# include +#endif + #if USE_JEMALLOC # include #endif @@ -135,6 +159,18 @@ namespace ProfileEvents { extern const Event MainConfigLoads; extern const Event ServerStartupMilliseconds; + extern const Event InterfaceNativeSendBytes; + extern const Event InterfaceNativeReceiveBytes; + extern const Event InterfaceHTTPSendBytes; + extern const Event InterfaceHTTPReceiveBytes; + extern const Event InterfacePrometheusSendBytes; + extern const Event InterfacePrometheusReceiveBytes; + extern const Event InterfaceInterserverSendBytes; + extern const Event InterfaceInterserverReceiveBytes; + extern const Event InterfaceMySQLSendBytes; + extern const Event InterfaceMySQLReceiveBytes; + extern const Event InterfacePostgreSQLSendBytes; + extern const Event InterfacePostgreSQLReceiveBytes; } namespace fs = std::filesystem; @@ -202,9 +238,11 @@ namespace DB namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; + extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; + extern const int NETWORK_ERROR; extern const int CORRUPTED_DATA; } @@ -219,6 +257,115 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } +Poco::Net::SocketAddress Server::socketBindListen( + const Poco::Util::AbstractConfiguration & config, + Poco::Net::ServerSocket & socket, + const std::string & host, + UInt16 port, + [[maybe_unused]] bool secure) const +{ + auto address = makeSocketAddress(host, port, &logger()); + socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false)); + /// If caller requests any available port from the OS, discover it after binding. + if (port == 0) + { + address = socket.address(); + LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port()); + } + + socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096)); + + return address; +} + +Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) +{ + auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host"); + if (listen_hosts.empty()) + { + listen_hosts.emplace_back("::1"); + listen_hosts.emplace_back("127.0.0.1"); + } + return listen_hosts; +} + +Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) +{ + auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host"); + if (!interserver_listen_hosts.empty()) + return interserver_listen_hosts; + + /// Use more general restriction in case of emptiness + return getListenHosts(config); +} + +bool getListenTry(const Poco::Util::AbstractConfiguration & config) +{ + bool listen_try = config.getBool("listen_try", false); + if (!listen_try) + { + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + listen_try = + DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() && + std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol) + { + return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); + }); + } + return listen_try; +} + + +void Server::createServer( + Poco::Util::AbstractConfiguration & config, + const std::string & listen_host, + const char * port_name, + bool listen_try, + bool start_server, + std::vector & servers, + CreateServerFunc && func) const +{ + /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. + if (config.getString(port_name, "").empty()) + return; + + /// If we already have an active server for this listen_host/port_name, don't create it again + for (const auto & server : servers) + { + if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name) + return; + } + + auto port = config.getInt(port_name); + try + { + servers.push_back(func(port)); + if (start_server) + { + servers.back().start(); + LOG_INFO(&logger(), "Listening for {}", servers.back().getDescription()); + } + global_context->registerServerPort(port_name, port); + } + catch (const Poco::Exception &) + { + if (listen_try) + { + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " + "specify not disabled IPv4 or IPv6 address to listen in element of configuration " + "file. Example for disabled IPv6: 0.0.0.0 ." + " Example for disabled IPv4: ::", + listen_host, port, getCurrentExceptionMessage(false)); + } + else + { + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); + } + } +} + #if defined(OS_LINUX) namespace @@ -518,7 +665,6 @@ try ServerSettings server_settings; server_settings.loadSettingsFromConfig(config()); - Poco::ThreadPool server_pool(3, server_settings.max_connections); ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings.format_alter_operations_with_parentheses); @@ -575,6 +721,11 @@ try CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); + Poco::ThreadPool server_pool(3, server_settings.max_connections); + std::mutex servers_lock; + std::vector servers; + std::vector servers_to_start_before_tables; + /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -624,10 +775,6 @@ try bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log"); - std::mutex servers_lock; - ProtocolServersManager servers(context(), &logger()); - InterServersManager servers_to_start_before_tables(context(), &logger()); - // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. @@ -659,7 +806,32 @@ try LOG_DEBUG(log, "Shut down storages."); - servers_to_start_before_tables.stopServers(server_settings, servers_lock); + if (!servers_to_start_before_tables.empty()) + { + LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); + size_t current_connections = 0; + { + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } + } + + if (current_connections) + LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(log, "Closed all listening sockets."); + + if (current_connections > 0) + current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished); + + if (current_connections) + LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); + else + LOG_INFO(log, "Closed connections to servers for tables."); + } global_context->shutdownKeeperDispatcher(); @@ -756,13 +928,19 @@ try server_settings.asynchronous_heavy_metrics_update_period_s, [&]() -> std::vector { + std::vector metrics; + std::lock_guard lock(servers_lock); - std::vector metrics1 = servers_to_start_before_tables.getMetrics(); - std::vector metrics2 = servers.getMetrics(); - metrics1.reserve(metrics1.size() + metrics2.size()); - metrics1.insert(metrics1.end(), std::make_move_iterator(metrics2.begin()), std::make_move_iterator(metrics2.end())); - return metrics1; - }); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); + + for (const auto & server : servers_to_start_before_tables) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + + for (const auto & server : servers) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + return metrics; + } + ); zkutil::validateZooKeeperConfig(config()); bool has_zookeeper = zkutil::hasZooKeeperConfig(config()); @@ -1410,8 +1588,7 @@ try if (global_context->isServerCompletelyStarted()) { std::lock_guard lock(servers_lock); - servers.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config); - servers_to_start_before_tables.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config); + updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } } @@ -1458,17 +1635,141 @@ try /// Must be the last. latest_config = config; }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + /* already_loaded = */ false); /// Reload it right now (initial loading) - servers_to_start_before_tables.createServers( - config(), - *this, - servers_lock, - server_pool, - async_metrics, - /* start_servers= */ false, - ServerType(ServerType::Type::QUERIES_ALL) - ); + const auto listen_hosts = getListenHosts(config()); + const auto interserver_listen_hosts = getInterserverListenHosts(config()); + const auto listen_try = getListenTry(config()); + + if (config().has("keeper_server.server_id")) + { +#if USE_NURAFT + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } + /// Initialize keeper RAFT. + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); + + auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & + { + return global_context->getConfigRef(); + }; + + for (const auto & listen_host : listen_hosts) + { + /// TCP Keeper + const char * port_name = "keeper_server.tcp_port"; + createServer( + config(), listen_host, port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config(), socket, listen_host, port); + socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); + return ProtocolServerAdapter( + listen_host, + port_name, + "Keeper (tcp): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory( + config_getter, global_context->getKeeperDispatcher(), + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), + false), server_pool, socket)); + }); + + const char * secure_port_name = "keeper_server.tcp_port_secure"; + createServer( + config(), listen_host, secure_port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); + return ProtocolServerAdapter( + listen_host, + secure_port_name, + "Keeper with secure protocol (tcp_secure): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory( + config_getter, global_context->getKeeperDispatcher(), + global_context->getSettingsRef().receive_timeout.totalSeconds(), + global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket)); +#else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif + }); + + /// HTTP control endpoints + port_name = "keeper_server.http_control.port"; + createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { + auto http_context = httpContext(); + Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(http_context->getReceiveTimeout()); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config(), socket, listen_host, port); + socket.setReceiveTimeout(http_context->getReceiveTimeout()); + socket.setSendTimeout(http_context->getSendTimeout()); + return ProtocolServerAdapter( + listen_host, + port_name, + "HTTP Control: http://" + address.toString(), + std::make_unique( + std::move(http_context), + createKeeperHTTPControlMainHandlerFactory( + config_getter(), + global_context->getKeeperDispatcher(), + "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)); + }); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); +#endif + + } + + { + std::lock_guard lock(servers_lock); + /// We should start interserver communications before (and more important shutdown after) tables. + /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. + /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can + /// communicate with zookeeper, execute merges, etc. + createInterserverServers( + config(), + interserver_listen_hosts, + listen_try, + server_pool, + async_metrics, + servers_to_start_before_tables, + /* start_servers= */ false); + + + for (auto & server : servers_to_start_before_tables) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } + } /// Initialize access storages. auto & access_control = global_context->getAccessControl(); @@ -1498,18 +1799,19 @@ try global_context->setStopServersCallback([&](const ServerType & server_type) { std::lock_guard lock(servers_lock); - servers.stopServers(server_type); + stopServers(servers, server_type); }); global_context->setStartServersCallback([&](const ServerType & server_type) { std::lock_guard lock(servers_lock); - servers.createServers( + createServers( config(), - *this, - servers_lock, + listen_hosts, + listen_try, server_pool, async_metrics, + servers, /* start_servers= */ true, server_type); }); @@ -1722,21 +2024,18 @@ try { std::lock_guard lock(servers_lock); - servers.createServers( - config(), - *this, - servers_lock, - server_pool, - async_metrics, - false, - ServerType(ServerType::Type::QUERIES_ALL)); + createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) - throw Exception( - ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "No servers started (add valid listen_host and 'tcp_port' " - "or 'http_port' to configuration file.)"); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " + "to configuration file.)"); } + if (servers.empty()) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " + "to configuration file.)"); + #if USE_SSL CertificateReloader::instance().tryLoad(config()); #endif @@ -1808,7 +2107,12 @@ try { std::lock_guard lock(servers_lock); - servers.startServers(); + for (auto & server : servers) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } + global_context->setServerCompletelyStarted(); LOG_INFO(log, "Ready for connections."); } @@ -1844,10 +2148,46 @@ try access_control.stopPeriodicReloading(); is_cancelled = true; - const auto remaining_connections = servers.stopServers(server_settings, servers_lock); + + LOG_DEBUG(log, "Waiting for current connections to close."); + + size_t current_connections = 0; + { + std::lock_guard lock(servers_lock); + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } + } + + if (current_connections) + LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(log, "Closed all listening sockets."); + + /// Wait for unfinished backups and restores. + /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries + /// (because killAllQueries() will cancel all running backups/restores). + if (server_settings.shutdown_wait_backups_and_restores) + global_context->waitAllBackupsAndRestores(); + + /// Killing remaining queries. + if (!server_settings.shutdown_wait_unfinished_queries) + global_context->getProcessList().killAllQueries(); + + if (current_connections) + current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished); + + if (current_connections) + LOG_WARNING(log, "Closed connections. But {} remain." + " Tip: To increase wait time add to config: 60", current_connections); + else + LOG_INFO(log, "Closed connections."); + dns_cache_updater.reset(); - if (remaining_connections) + if (current_connections) { /// There is no better way to force connections to close in Poco. /// Otherwise connection handlers will continue to live @@ -1881,4 +2221,561 @@ catch (...) return code ? code : -1; } +std::unique_ptr Server::buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure) +{ + auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr + { + if (type == "tcp") + return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes)); + + if (type == "tls") +#if USE_SSL + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif + + if (type == "proxy1") + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); + if (type == "mysql") + return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes)); + if (type == "postgres") + return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes)); + if (type == "http") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes) + ); + if (type == "prometheus") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes) + ); + if (type == "interserver") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes) + ); + + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); + }; + + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset {conf_name}; + + auto stack = std::make_unique(*this, conf_name); + + while (true) + { + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + + return stack; +} + +HTTPContextPtr Server::httpContext() const +{ + return std::make_shared(context()); +} + +void Server::createServers( + Poco::Util::AbstractConfiguration & config, + const Strings & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers, + const ServerType & server_type) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); + + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + + for (const auto & protocol : protocols) + { + if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + continue; + + std::string prefix = "protocols." + protocol + "."; + std::string port_name = prefix + "port"; + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + + if (!config.has(prefix + "port")) + continue; + + std::vector hosts; + if (config.has(prefix + "host")) + hosts.push_back(config.getString(prefix + "host")); + else + hosts = listen_hosts; + + for (const auto & host : hosts) + { + bool is_secure = false; + auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); + + if (stack->empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + + createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } + } + + for (const auto & listen_host : listen_hosts) + { + const char * port_name; + + if (server_type.shouldStart(ServerType::Type::HTTP)) + { + /// HTTP + port_name = "http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); + }); + } + + if (server_type.shouldStart(ServerType::Type::HTTPS)) + { + /// HTTPS + port_name = "https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)); +#else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); +#endif + }); + } + + if (server_type.shouldStart(ServerType::Type::TCP)) + { + /// TCP + port_name = "tcp_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } + + if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) + { + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + port_name = "tcp_with_proxy_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } + + if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) + { + /// TCP with SSL + port_name = "tcp_port_secure"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + #if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + #else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + #endif + }); + } + + if (server_type.shouldStart(ServerType::Type::MYSQL)) + { + port_name = "mysql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } + + if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) + { + port_name = "postgresql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } + +#if USE_GRPC + if (server_type.shouldStart(ServerType::Type::GRPC)) + { + port_name = "grpc_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); + }); + } +#endif + if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) + { + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)); + }); + } + } +} + +void Server::createInterserverServers( + Poco::Util::AbstractConfiguration & config, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers, + const ServerType & server_type) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); + + /// Now iterate over interserver_listen_hosts + for (const auto & interserver_listen_host : interserver_listen_hosts) + { + const char * port_name; + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) + { + /// Interserver IO HTTP + port_name = "interserver_http_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); + }); + } + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) + { + port_name = "interserver_https_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params, + ProfileEvents::InterfaceInterserverReceiveBytes, + ProfileEvents::InterfaceInterserverSendBytes)); +#else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif + }); + } + } +} + +void Server::stopServers( + std::vector & servers, + const ServerType & server_type +) const +{ + LoggerRawPtr log = &logger(); + + /// Remove servers once all their connections are closed + auto check_server = [&log](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG(log, "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); + + for (auto & server : servers) + { + if (!server.isStopping()) + { + const std::string server_port_name = server.getPortName(); + + if (server_type.shouldStop(server_port_name)) + server.stop(); + } + } + + std::erase_if(servers, std::bind_front(check_server, "")); +} + +void Server::updateServers( + Poco::Util::AbstractConfiguration & config, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + std::vector & servers_to_start_before_tables) +{ + LoggerRawPtr log = &logger(); + + const auto listen_hosts = getListenHosts(config); + const auto interserver_listen_hosts = getInterserverListenHosts(config); + const auto listen_try = getListenTry(config); + + /// Remove servers once all their connections are closed + auto check_server = [&log](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG(log, "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)")); + + Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); + + std::vector all_servers; + all_servers.reserve(servers.size() + servers_to_start_before_tables.size()); + for (auto & server : servers) + all_servers.push_back(&server); + + for (auto & server : servers_to_start_before_tables) + all_servers.push_back(&server); + + for (auto * server : all_servers) + { + if (!server->isStopping()) + { + std::string port_name = server->getPortName(); + bool has_host = false; + bool is_http = false; + if (port_name.starts_with("protocols.")) + { + std::string protocol = port_name.substr(0, port_name.find_last_of('.')); + has_host = config.has(protocol + ".host"); + + std::string conf_name = protocol; + std::string prefix = protocol + "."; + std::unordered_set pset {conf_name}; + while (true) + { + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "http") + { + is_http = true; + break; + } + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + } + else + { + /// NOTE: better to compare using getPortName() over using + /// dynamic_cast<> since HTTPServer is also used for prometheus and + /// internal replication communications. + is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port"; + } + + if (!has_host) + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end(); + bool has_port = !config.getString(port_name, "").empty(); + bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); + if (force_restart) + LOG_TRACE(log, " had been changed, will reload {}", server->getDescription()); + + if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart) + { + server->stop(); + LOG_INFO(log, "Stopped listening for {}", server->getDescription()); + } + } + } + + createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true); + + std::erase_if(servers, std::bind_front(check_server, "")); + std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, "")); +} + } diff --git a/programs/server/Server.h b/programs/server/Server.h index b4931ce53d1..3f03dd137ef 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -1,10 +1,15 @@ #pragma once #include + #include +#include +#include +#include +#include /** Server provides three interfaces: - * 1. HTTP, GRPC - simple interfaces for any applications. + * 1. HTTP - simple interface for any applications. * 2. TCP - interface for native clickhouse-client and for server to server internal communications. * More rich and efficient, but less compatible * - data is transferred by columns; @@ -13,21 +18,43 @@ * 3. Interserver HTTP - for replication. */ +namespace Poco +{ + namespace Net + { + class ServerSocket; + } +} + namespace DB { +class AsynchronousMetrics; +class ProtocolServerAdapter; class Server : public BaseDaemon, public IServer { public: using ServerApplication::run; - Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); } + Poco::Util::LayeredConfiguration & config() const override + { + return BaseDaemon::config(); + } - Poco::Logger & logger() const override { return BaseDaemon::logger(); } + Poco::Logger & logger() const override + { + return BaseDaemon::logger(); + } - ContextMutablePtr context() const override { return global_context; } + ContextMutablePtr context() const override + { + return global_context; + } - bool isCancelled() const override { return BaseDaemon::isCancelled(); } + bool isCancelled() const override + { + return BaseDaemon::isCancelled(); + } void defineOptions(Poco::Util::OptionSet & _options) override; @@ -46,6 +73,64 @@ private: ContextMutablePtr global_context; /// Updated/recent config, to compare http_handlers ConfigurationPtr latest_config; + + HTTPContextPtr httpContext() const; + + Poco::Net::SocketAddress socketBindListen( + const Poco::Util::AbstractConfiguration & config, + Poco::Net::ServerSocket & socket, + const std::string & host, + UInt16 port, + [[maybe_unused]] bool secure = false) const; + + std::unique_ptr buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure); + + using CreateServerFunc = std::function; + void createServer( + Poco::Util::AbstractConfiguration & config, + const std::string & listen_host, + const char * port_name, + bool listen_try, + bool start_server, + std::vector & servers, + CreateServerFunc && func) const; + + void createServers( + Poco::Util::AbstractConfiguration & config, + const Strings & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); + + void createInterserverServers( + Poco::Util::AbstractConfiguration & config, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); + + void updateServers( + Poco::Util::AbstractConfiguration & config, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + std::vector & servers_to_start_before_tables); + + void stopServers( + std::vector & servers, + const ServerType & server_type + ) const; }; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 33042fbc7fc..f2e10a27b75 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -236,7 +236,6 @@ add_object_library(clickhouse_client Client) add_object_library(clickhouse_bridge BridgeHelper) add_object_library(clickhouse_server Server) add_object_library(clickhouse_server_http Server/HTTP) -add_object_library(clickhouse_server_manager Server/ServersManager) add_object_library(clickhouse_formats Formats) add_object_library(clickhouse_processors Processors) add_object_library(clickhouse_processors_executors Processors/Executors) diff --git a/src/Server/ServersManager/IServersManager.cpp b/src/Server/ServersManager/IServersManager.cpp deleted file mode 100644 index 8b1eee94303..00000000000 --- a/src/Server/ServersManager/IServersManager.cpp +++ /dev/null @@ -1,268 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int NETWORK_ERROR; -extern const int INVALID_CONFIG_PARAMETER; -} - -IServersManager::IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_) - : global_context(global_context_), logger(logger_) -{ -} - - -bool IServersManager::empty() const -{ - return servers.empty(); -} - -std::vector IServersManager::getMetrics() const -{ - std::vector metrics; - metrics.reserve(servers.size()); - for (const auto & server : servers) - metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - return metrics; -} - -void IServersManager::startServers() -{ - for (auto & server : servers) - { - server.start(); - LOG_INFO(logger, "Listening for {}", server.getDescription()); - } -} - -void IServersManager::stopServers(const ServerType & server_type) -{ - /// Remove servers once all their connections are closed - auto check_server = [&](const char prefix[], auto & server) - { - if (!server.isStopping()) - return false; - size_t current_connections = server.currentConnections(); - LOG_DEBUG( - logger, - "Server {}{}: {} ({} connections)", - server.getDescription(), - prefix, - !current_connections ? "finished" : "waiting", - current_connections); - return !current_connections; - }; - - std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); - - for (auto & server : servers) - { - if (!server.isStopping() && server_type.shouldStop(server.getPortName())) - server.stop(); - } - - std::erase_if(servers, std::bind_front(check_server, "")); -} - -void IServersManager::updateServers( - const Poco::Util::AbstractConfiguration & config, - IServer & iserver, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - ConfigurationPtr latest_config) -{ - stopServersForUpdate(config, latest_config); - createServers(config, iserver, servers_lock, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL)); -} - -Poco::Net::SocketAddress IServersManager::socketBindListen( - const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const -{ - auto address = makeSocketAddress(host, port, logger); - socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false)); - /// If caller requests any available port from the OS, discover it after binding. - if (port == 0) - { - address = socket.address(); - LOG_DEBUG(logger, "Requested any available port (port == 0), actual port is {:d}", address.port()); - } - - socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096)); - return address; -} - -void IServersManager::createServer( - const Poco::Util::AbstractConfiguration & config, - const std::string & listen_host, - const char * port_name, - bool start_server, - CreateServerFunc && func) -{ - /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. - if (config.getString(port_name, "").empty()) - return; - - /// If we already have an active server for this listen_host/port_name, don't create it again - for (const auto & server : servers) - { - if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name) - return; - } - - auto port = config.getInt(port_name); - try - { - servers.push_back(func(port)); - if (start_server) - { - servers.back().start(); - LOG_INFO(logger, "Listening for {}", servers.back().getDescription()); - } - global_context->registerServerPort(port_name, port); - } - catch (const Poco::Exception &) - { - if (!getListenTry(config)) - { - throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); - } - LOG_WARNING( - logger, - "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " - "then consider to " - "specify not disabled IPv4 or IPv6 address to listen in element of configuration " - "file. Example for disabled IPv6: 0.0.0.0 ." - " Example for disabled IPv4: ::", - listen_host, - port, - getCurrentExceptionMessage(false)); - } -} - -void IServersManager::stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config) -{ - /// Remove servers once all their connections are closed - auto check_server = [&](const char prefix[], auto & server) - { - if (!server.isStopping()) - return false; - size_t current_connections = server.currentConnections(); - LOG_DEBUG( - logger, - "Server {}{}: {} ({} connections)", - server.getDescription(), - prefix, - !current_connections ? "finished" : "waiting", - current_connections); - return !current_connections; - }; - - std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)")); - - const auto listen_hosts = getListenHosts(config); - const Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : config; - - for (auto & server : servers) - { - if (server.isStopping()) - return; - std::string port_name = server.getPortName(); - bool has_host = false; - bool is_http = false; - if (port_name.starts_with("protocols.")) - { - std::string protocol = port_name.substr(0, port_name.find_last_of('.')); - has_host = config.has(protocol + ".host"); - - std::string conf_name = protocol; - std::string prefix = protocol + "."; - std::unordered_set pset{conf_name}; - while (true) - { - if (config.has(prefix + "type")) - { - std::string type = config.getString(prefix + "type"); - if (type == "http") - { - is_http = true; - break; - } - } - - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception( - ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } - } - else - { - /// NOTE: better to compare using getPortName() over using - /// dynamic_cast<> since HTTPServer is also used for prometheus and - /// internal replication communications. - is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; - } - - if (!has_host) - has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); - bool has_port = !config.getString(port_name, "").empty(); - bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); - if (force_restart) - LOG_TRACE(logger, " had been changed, will reload {}", server.getDescription()); - - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart) - { - server.stop(); - LOG_INFO(logger, "Stopped listening for {}", server.getDescription()); - } - } - - std::erase_if(servers, std::bind_front(check_server, "")); -} - -Strings IServersManager::getListenHosts(const Poco::Util::AbstractConfiguration & config) const -{ - auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host"); - if (listen_hosts.empty()) - { - listen_hosts.emplace_back("::1"); - listen_hosts.emplace_back("127.0.0.1"); - } - return listen_hosts; -} - -bool IServersManager::getListenTry(const Poco::Util::AbstractConfiguration & config) const -{ - bool listen_try = config.getBool("listen_try", false); - if (!listen_try) - { - Poco::Util::AbstractConfiguration::Keys protocols; - config.keys("protocols", protocols); - listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() - && std::none_of( - protocols.begin(), - protocols.end(), - [&](const auto & protocol) - { return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); }); - } - return listen_try; -} - -} diff --git a/src/Server/ServersManager/IServersManager.h b/src/Server/ServersManager/IServersManager.h deleted file mode 100644 index 7e1d9d50d82..00000000000 --- a/src/Server/ServersManager/IServersManager.h +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class IServersManager -{ -public: - IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_); - virtual ~IServersManager() = default; - - bool empty() const; - std::vector getMetrics() const; - - virtual void createServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type) - = 0; - - void startServers(); - - void stopServers(const ServerType & server_type); - virtual size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) = 0; - - virtual void updateServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - ConfigurationPtr latest_config); - -protected: - ContextMutablePtr global_context; - Poco::Logger * logger; - - std::vector servers; - - Poco::Net::SocketAddress socketBindListen( - const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const; - - using CreateServerFunc = std::function; - void createServer( - const Poco::Util::AbstractConfiguration & config, - const std::string & listen_host, - const char * port_name, - bool start_server, - CreateServerFunc && func); - - void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config); - - Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) const; - bool getListenTry(const Poco::Util::AbstractConfiguration & config) const; -}; - -} diff --git a/src/Server/ServersManager/InterServersManager.cpp b/src/Server/ServersManager/InterServersManager.cpp deleted file mode 100644 index 4425d468248..00000000000 --- a/src/Server/ServersManager/InterServersManager.cpp +++ /dev/null @@ -1,327 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if USE_SSL -# include -#endif - -#if USE_NURAFT -# include -# include -#endif - -namespace ProfileEvents -{ -extern const Event InterfaceInterserverSendBytes; -extern const Event InterfaceInterserverReceiveBytes; -} - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int SUPPORT_IS_DISABLED; -} - -void InterServersManager::createServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type) -{ - if (config.has("keeper_server.server_id")) - { -#if USE_NURAFT - //// If we don't have configured connection probably someone trying to use clickhouse-server instead - //// of clickhouse-keeper, so start synchronously. - bool can_initialize_keeper_async = false; - - if (zkutil::hasZooKeeperConfig(config)) /// We have configured connection to some zookeeper cluster - { - /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start - /// synchronously. - can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); - } - /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(can_initialize_keeper_async); - FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); - - auto config_getter = [this]() -> const Poco::Util::AbstractConfiguration & { return global_context->getConfigRef(); }; - - for (const auto & listen_host : getListenHosts(config)) - { - /// TCP Keeper - constexpr auto port_name = "keeper_server.tcp_port"; - createServer( - config, - listen_host, - port_name, - /* start_server = */ false, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout( - Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); - socket.setSendTimeout( - Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); - return ProtocolServerAdapter( - listen_host, - port_name, - "Keeper (tcp): " + address.toString(), - std::make_unique( - new KeeperTCPHandlerFactory( - config_getter, - global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout.totalSeconds(), - global_context->getSettingsRef().send_timeout.totalSeconds(), - false), - server_pool, - socket)); - }); - - constexpr auto secure_port_name = "keeper_server.tcp_port_secure"; - createServer( - config, - listen_host, - secure_port_name, - /* start_server = */ false, - [&](UInt16 port) -> ProtocolServerAdapter - { -# if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout( - Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); - socket.setSendTimeout( - Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); - return ProtocolServerAdapter( - listen_host, - secure_port_name, - "Keeper with secure protocol (tcp_secure): " + address.toString(), - std::make_unique( - new KeeperTCPHandlerFactory( - config_getter, - global_context->getKeeperDispatcher(), - global_context->getSettingsRef().receive_timeout.totalSeconds(), - global_context->getSettingsRef().send_timeout.totalSeconds(), - true), - server_pool, - socket)); -# else - UNUSED(port); - throw Exception( - ErrorCodes::SUPPORT_IS_DISABLED, - "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -# endif - }); - - /// HTTP control endpoints - createServer( - config, - listen_host, - /* port_name = */ "keeper_server.http_control.port", - /* start_server = */ false, - [&](UInt16 port) -> ProtocolServerAdapter - { - auto http_context = std::make_shared(global_context); - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(http_context->getReceiveTimeout()); - http_params->setKeepAliveTimeout(keep_alive_timeout); - - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(http_context->getReceiveTimeout()); - socket.setSendTimeout(http_context->getSendTimeout()); - return ProtocolServerAdapter( - listen_host, - port_name, - "HTTP Control: http://" + address.toString(), - std::make_unique( - std::move(http_context), - createKeeperHTTPControlMainHandlerFactory( - config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), - server_pool, - socket, - http_params)); - }); - } -#else - throw Exception( - ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); -#endif - } - - { - std::lock_guard lock(servers_lock); - /// We should start interserver communications before (and more important shutdown after) tables. - /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. - /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can - /// communicate with zookeeper, execute merges, etc. - createInterserverServers(config, server, server_pool, async_metrics, start_servers, server_type); - startServers(); - } -} - -size_t InterServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) -{ - if (servers.empty()) - { - return 0; - } - - LOG_DEBUG(logger, "Waiting for current connections to servers for tables to finish."); - - size_t current_connections = 0; - { - std::lock_guard lock(servers_lock); - for (auto & server : servers) - { - server.stop(); - current_connections += server.currentConnections(); - } - } - - if (current_connections) - LOG_INFO(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); - else - LOG_INFO(logger, "Closed all listening sockets."); - - if (current_connections > 0) - current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished); - - if (current_connections) - LOG_INFO( - logger, - "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections " - "after context shutdown.", - current_connections); - else - LOG_INFO(logger, "Closed connections to servers for tables."); - return current_connections; -} - -void InterServersManager::updateServers( - const Poco::Util::AbstractConfiguration & config, - IServer & iserver, - std::mutex & /*servers_lock*/, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - ConfigurationPtr latest_config) -{ - stopServersForUpdate(config, latest_config); - createInterserverServers(config, iserver, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL)); -} - -Strings InterServersManager::getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const -{ - auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host"); - if (!interserver_listen_hosts.empty()) - return interserver_listen_hosts; - - /// Use more general restriction in case of emptiness - return getListenHosts(config); -} - -void InterServersManager::createInterserverServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type) -{ - const Settings & settings = global_context->getSettingsRef(); - - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); - - /// Now iterate over interserver_listen_hosts - for (const auto & interserver_listen_host : getInterserverListenHosts(config)) - { - if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) - { - /// Interserver IO HTTP - constexpr auto port_name = "interserver_http_port"; - createServer( - config, - interserver_listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "replica communication (interserver): http://" + address.toString(), - std::make_unique( - std::make_shared(global_context), - createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfaceInterserverReceiveBytes, - ProfileEvents::InterfaceInterserverSendBytes)); - }); - } - - if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) - { - constexpr auto port_name = "interserver_https_port"; - createServer( - config, - interserver_listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "secure replica communication (interserver): https://" + address.toString(), - std::make_unique( - std::make_shared(global_context), - createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfaceInterserverReceiveBytes, - ProfileEvents::InterfaceInterserverSendBytes)); -#else - UNUSED(port); - throw Exception( - ErrorCodes::SUPPORT_IS_DISABLED, - "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); - } - } -} - -} diff --git a/src/Server/ServersManager/InterServersManager.h b/src/Server/ServersManager/InterServersManager.h deleted file mode 100644 index 8780eae18e0..00000000000 --- a/src/Server/ServersManager/InterServersManager.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class InterServersManager : public IServersManager -{ -public: - using IServersManager::IServersManager; - - void createServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type) override; - - size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override; - - void updateServers( - const Poco::Util::AbstractConfiguration & config, - IServer & iserver, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - ConfigurationPtr latest_config) override; - -private: - Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const; - - void createInterserverServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type); -}; - -} diff --git a/src/Server/ServersManager/ProtocolServersManager.cpp b/src/Server/ServersManager/ProtocolServersManager.cpp deleted file mode 100644 index af57de3ac3c..00000000000 --- a/src/Server/ServersManager/ProtocolServersManager.cpp +++ /dev/null @@ -1,523 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if USE_SSL -# include -#endif - -#if USE_GRPC -# include -#endif - -namespace ProfileEvents -{ -extern const Event InterfaceNativeSendBytes; -extern const Event InterfaceNativeReceiveBytes; -extern const Event InterfaceHTTPSendBytes; -extern const Event InterfaceHTTPReceiveBytes; -extern const Event InterfacePrometheusSendBytes; -extern const Event InterfacePrometheusReceiveBytes; -extern const Event InterfaceMySQLSendBytes; -extern const Event InterfaceMySQLReceiveBytes; -extern const Event InterfacePostgreSQLSendBytes; -extern const Event InterfacePostgreSQLReceiveBytes; -extern const Event InterfaceInterserverSendBytes; -extern const Event InterfaceInterserverReceiveBytes; -} - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int SUPPORT_IS_DISABLED; -extern const int INVALID_CONFIG_PARAMETER; -} - -void ProtocolServersManager::createServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - std::mutex & /*servers_lock*/, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type) -{ - auto listen_hosts = getListenHosts(config); - const Settings & settings = global_context->getSettingsRef(); - - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout); - - Poco::Util::AbstractConfiguration::Keys protocols; - config.keys("protocols", protocols); - - for (const auto & protocol : protocols) - { - if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) - continue; - - std::string prefix = "protocols." + protocol + "."; - std::string port_name = prefix + "port"; - std::string description{" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - - if (!config.has(prefix + "port")) - continue; - - std::vector hosts; - if (config.has(prefix + "host")) - hosts.push_back(config.getString(prefix + "host")); - else - hosts = listen_hosts; - - for (const auto & host : hosts) - { - bool is_secure = false; - auto stack = buildProtocolStackFromConfig(config, server, protocol, http_params, async_metrics, is_secure); - - if (stack->empty()) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); - - createServer( - config, - host, - port_name.c_str(), - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - host, - port_name.c_str(), - description + ": " + address.toString(), - std::make_unique(stack.release(), server_pool, socket, new Poco::Net::TCPServerParams)); - }); - } - } - - for (const auto & listen_host : listen_hosts) - { - if (server_type.shouldStart(ServerType::Type::HTTP)) - { - /// HTTP - constexpr auto port_name = "http_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "http://" + address.toString(), - std::make_unique( - std::make_shared(global_context), - createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfaceHTTPReceiveBytes, - ProfileEvents::InterfaceHTTPSendBytes)); - }); - } - - if (server_type.shouldStart(ServerType::Type::HTTPS)) - { - /// HTTPS - constexpr auto port_name = "https_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "https://" + address.toString(), - std::make_unique( - std::make_shared(global_context), - createHandlerFactory(server, config, async_metrics, "HTTPSHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfaceHTTPReceiveBytes, - ProfileEvents::InterfaceHTTPSendBytes)); -#else - UNUSED(port); - throw Exception( - ErrorCodes::SUPPORT_IS_DISABLED, - "HTTPS protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); - } - - if (server_type.shouldStart(ServerType::Type::TCP)) - { - /// TCP - constexpr auto port_name = "tcp_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp): " + address.toString(), - std::make_unique( - new TCPHandlerFactory( - server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - - if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) - { - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - constexpr auto port_name = "tcp_with_proxy_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp) with PROXY: " + address.toString(), - std::make_unique( - new TCPHandlerFactory( - server, false, true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - - if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) - { - /// TCP with SSL - constexpr auto port_name = "tcp_port_secure"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPHandlerFactory( - server, true, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); -#else - UNUSED(port); - throw Exception( - ErrorCodes::SUPPORT_IS_DISABLED, - "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); - } - - if (server_type.shouldStart(ServerType::Type::MYSQL)) - { - constexpr auto port_name = "mysql_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "MySQL compatibility protocol: " + address.toString(), - std::make_unique( - new MySQLHandlerFactory( - server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - - if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) - { - constexpr auto port_name = "postgresql_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique( - new PostgreSQLHandlerFactory( - server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); - } - -#if USE_GRPC - if (server_type.shouldStart(ServerType::Type::GRPC)) - { - constexpr auto port_name = "grpc_port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::SocketAddress server_address(listen_host, port); - return ProtocolServerAdapter( - listen_host, - port_name, - "gRPC protocol: " + server_address.toString(), - std::make_unique(server, makeSocketAddress(listen_host, port, logger))); - }); - } -#endif - if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) - { - /// Prometheus (if defined and not setup yet with http_port) - constexpr auto port_name = "prometheus.port"; - createServer( - config, - listen_host, - port_name, - start_servers, - [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Prometheus: http://" + address.toString(), - std::make_unique( - std::make_shared(global_context), - createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"), - server_pool, - socket, - http_params, - ProfileEvents::InterfacePrometheusReceiveBytes, - ProfileEvents::InterfacePrometheusSendBytes)); - }); - } - } -} - -size_t ProtocolServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) -{ - if (servers.empty()) - { - return 0; - } - - LOG_DEBUG(logger, "Waiting for current connections to close."); - - size_t current_connections = 0; - { - std::lock_guard lock(servers_lock); - for (auto & server : servers) - { - server.stop(); - current_connections += server.currentConnections(); - } - } - - if (current_connections) - LOG_WARNING(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); - else - LOG_INFO(logger, "Closed all listening sockets."); - - /// Wait for unfinished backups and restores. - /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries - /// (because killAllQueries() will cancel all running backups/restores). - if (server_settings.shutdown_wait_backups_and_restores) - global_context->waitAllBackupsAndRestores(); - /// Killing remaining queries. - if (!server_settings.shutdown_wait_unfinished_queries) - global_context->getProcessList().killAllQueries(); - - if (current_connections) - current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished); - - if (current_connections) - LOG_WARNING( - logger, - "Closed connections. But {} remain." - " Tip: To increase wait time add to config: 60", - current_connections); - else - LOG_INFO(logger, "Closed connections."); - return current_connections; -} - -std::unique_ptr ProtocolServersManager::buildProtocolStackFromConfig( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - const std::string & protocol, - Poco::Net::HTTPServerParams::Ptr http_params, - AsynchronousMetrics & async_metrics, - bool & is_secure) const -{ - auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr - { - if (type == "tcp") - return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory( - server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes)); - - if (type == "tls") -#if USE_SSL - return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(server, conf_name)); -#else - throw Exception( - ErrorCodes::SUPPORT_IS_DISABLED, - "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - - if (type == "proxy1") - return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(server, conf_name)); - if (type == "mysql") - return TCPServerConnectionFactory::Ptr( - new MySQLHandlerFactory(server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes)); - if (type == "postgres") - return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory( - server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes)); - if (type == "http") - return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory( - std::make_shared(global_context), - http_params, - createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"), - ProfileEvents::InterfaceHTTPReceiveBytes, - ProfileEvents::InterfaceHTTPSendBytes)); - if (type == "prometheus") - return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory( - std::make_shared(global_context), - http_params, - createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"), - ProfileEvents::InterfacePrometheusReceiveBytes, - ProfileEvents::InterfacePrometheusSendBytes)); - if (type == "interserver") - return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory( - std::make_shared(global_context), - http_params, - createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"), - ProfileEvents::InterfaceInterserverReceiveBytes, - ProfileEvents::InterfaceInterserverSendBytes)); - - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); - }; - - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - std::unordered_set pset{conf_name}; - - auto stack = std::make_unique(server, conf_name); - - while (true) - { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) - { - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; - } - - stack->append(create_factory(type, conf_name)); - } - - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception( - ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } - - return stack; -} - -} diff --git a/src/Server/ServersManager/ProtocolServersManager.h b/src/Server/ServersManager/ProtocolServersManager.h deleted file mode 100644 index e9eaaeb2184..00000000000 --- a/src/Server/ServersManager/ProtocolServersManager.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ - -class ProtocolServersManager : public IServersManager -{ -public: - using IServersManager::IServersManager; - - void createServers( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - std::mutex & servers_lock, - Poco::ThreadPool & server_pool, - AsynchronousMetrics & async_metrics, - bool start_servers, - const ServerType & server_type) override; - - using IServersManager::stopServers; - size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override; - -private: - std::unique_ptr buildProtocolStackFromConfig( - const Poco::Util::AbstractConfiguration & config, - IServer & server, - const std::string & protocol, - Poco::Net::HTTPServerParams::Ptr http_params, - AsynchronousMetrics & async_metrics, - bool & is_secure) const; -}; - -} From c6660c70b17b8e3c1e22192b825deeb5f9f2120b Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 27 May 2024 10:27:50 +0200 Subject: [PATCH 642/651] Add missing reinterpret functions to documentation --- .../functions/type-conversion-functions.md | 617 +++++++++++++++++- 1 file changed, 611 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 1030d92c76b..2360cecb9a5 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -996,12 +996,585 @@ Result: └─────────────────────────────────────────────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64) +## reinterpretAsUInt8 -## reinterpretAsInt(8\|16\|32\|64) +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt8. -## reinterpretAsFloat* +**Syntax** +```sql +reinterpretAsUInt8(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt8. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as UInt8. [UInt8](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toInt8(257) AS x, + toTypeName(x), + reinterpretAsUInt8(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ Int8 │ 1 │ UInt8 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt16 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt16. + +**Syntax** + +```sql +reinterpretAsUInt16(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt16. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as UInt16. [UInt16](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt8(257) AS x, + toTypeName(x), + reinterpretAsUInt16(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ UInt8 │ 1 │ UInt16 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt32 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt32. + +**Syntax** + +```sql +reinterpretAsUInt32(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt32. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as UInt32. [UInt32](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt16(257) AS x, + toTypeName(x), + reinterpretAsUInt32(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt16 │ 257 │ UInt32 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt64 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt64. + +**Syntax** + +```sql +reinterpretAsUInt64(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt64. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as UInt64. [UInt64](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt32(257) AS x, + toTypeName(x), + reinterpretAsUInt64(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt32 │ 257 │ UInt64 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt128 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt128. + +**Syntax** + +```sql +reinterpretAsUInt128(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt64. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as UInt128. [UInt128](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt64(257) AS x, + toTypeName(x), + reinterpretAsUInt128(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt64 │ 257 │ UInt128 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsUInt256 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt256. + +**Syntax** + +```sql +reinterpretAsUInt256(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as UInt256. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as UInt256. [UInt256](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256). + +**Example** + +Query: + +```sql +SELECT + toUInt128(257) AS x, + toTypeName(x), + reinterpretAsUInt256(x) AS res, + toTypeName(res) +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ UInt128 │ 257 │ UInt256 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt8 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int8. + +**Syntax** + +```sql +reinterpretAsInt8(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int8. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Int8. [Int8](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toUInt8(257) AS x, + toTypeName(x), + reinterpretAsInt8(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ UInt8 │ 1 │ Int8 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt16 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int16. + +**Syntax** + +```sql +reinterpretAsInt16(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int16. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Int16. [Int16](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt8(257) AS x, + toTypeName(x), + reinterpretAsInt16(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 1 │ Int8 │ 1 │ Int16 │ +└───┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt32 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int32. + +**Syntax** + +```sql +reinterpretAsInt32(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int32. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Int32. [Int32](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt16(257) AS x, + toTypeName(x), + reinterpretAsInt32(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int16 │ 257 │ Int32 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt64 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int64. + +**Syntax** + +```sql +reinterpretAsInt64(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int64. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Int64. [Int64](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt32(257) AS x, + toTypeName(x), + reinterpretAsInt64(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int32 │ 257 │ Int64 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt128 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int128. + +**Syntax** + +```sql +reinterpretAsInt128(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int128. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Int128. [Int128](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt64(257) AS x, + toTypeName(x), + reinterpretAsInt128(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int64 │ 257 │ Int128 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsInt256 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int256. + +**Syntax** + +```sql +reinterpretAsInt256(x) +``` + +**Parameters** + +- `x`: value to byte reinterpret as Int256. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Int256. [Int256](../data-types/int-uint.md/#int-ranges). + +**Example** + +Query: + +```sql +SELECT + toInt128(257) AS x, + toTypeName(x), + reinterpretAsInt256(x) AS res, + toTypeName(res); +``` + +Result: + +```response +┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐ +│ 257 │ Int128 │ 257 │ Int256 │ +└─────┴───────────────┴─────┴─────────────────┘ +``` + +## reinterpretAsFloat32 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float32. + +**Syntax** + +```sql +reinterpretAsFloat32(x) +``` + +**Parameters** + +- `x`: value to reinterpret as Float32. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Float32. [Float32](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT reinterpretAsUInt32(toFloat32(0.2)) as x, reinterpretAsFloat32(x); +``` + +Result: + +```response +┌──────────x─┬─reinterpretAsFloat32(x)─┐ +│ 1045220557 │ 0.2 │ +└────────────┴─────────────────────────┘ +``` + +## reinterpretAsFloat64 + +Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float64. + +**Syntax** + +```sql +reinterpretAsFloat64(x) +``` + +**Parameters** + +- `x`: value to reinterpret as Float64. + +:::note +Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +::: + +**Returned value** + +- Reinterpreted value `x` as Float64. [Float64](../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT reinterpretAsUInt64(toFloat64(0.2)) as x, reinterpretAsFloat64(x); +``` + +Result: + +```response +┌───────────────────x─┬─reinterpretAsFloat64(x)─┐ +│ 4596373779694328218 │ 0.2 │ +└─────────────────────┴─────────────────────────┘ +``` ## reinterpretAsDate @@ -1093,11 +1666,43 @@ Result: ## reinterpretAsString -This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +This function accepts a number, date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. + +**Syntax** + +```sql +reinterpretAsString(x) +``` + +**Parameters** + +- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md). + +**Returned value** + +- String containing bytes representing `x`. [String](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT + reinterpretAsString(toDateTime('1970-01-01 01:01:05')), + reinterpretAsString(toDate('1970-03-07')); +``` + +Result: + +```response +┌─reinterpretAsString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsString(toDate('1970-03-07'))─┐ +│ A │ A │ +└────────────────────────────────────────────────────────┴───────────────────────────────────────────┘ +``` ## reinterpretAsFixedString -This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. +This function accepts a number, date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. **Syntax** @@ -1137,7 +1742,7 @@ Result: In addition to the UUID functions listed here, there is dedicated [UUID function documentation](/docs/en/sql-reference/functions/uuid-functions.md). ::: -Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. +Accepts a 16 byte string and returns a UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** From 8b551cc832a765296213ce462a5472d589b1955d Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 27 May 2024 10:30:18 +0200 Subject: [PATCH 643/651] Remove unneeded test file - one already exists --- .../03156_reinterpret_functions.sql | 36 ------------------- 1 file changed, 36 deletions(-) delete mode 100644 tests/queries/0_stateless/03156_reinterpret_functions.sql diff --git a/tests/queries/0_stateless/03156_reinterpret_functions.sql b/tests/queries/0_stateless/03156_reinterpret_functions.sql deleted file mode 100644 index 4acaaf47cef..00000000000 --- a/tests/queries/0_stateless/03156_reinterpret_functions.sql +++ /dev/null @@ -1,36 +0,0 @@ --- Date and DateTime - -SELECT reinterpretAsDate(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsDate('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsDate([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} -SELECT reinterpretAsDateTime(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsDateTime('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsDateTime([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} - -SELECT reinterpretAsDate(65); -SELECT reinterpretAsDate('A'); -SELECT reinterpretAsDateTime(65); -SELECT reinterpretAsDate('A'); - --- Fixed String - -SELECT reinterpretAsFixedString(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsFixedString(toDate('1970-01-01'),''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsFixedString([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} - -SELECT reinterpretAsFixedString(toDate('1970-03-07')); -SELECT reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')); -SELECT reinterpretAsFixedString(65); - --- Float32, Float64 - -SELECT reinterpretAsFloat32(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsFloat64(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsFloat32('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsFloat64('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT reinterpretAsFloat32([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT} -SELECT reinterpretAsFloat64([0, 1, 2]); -- { clientError4 ILLEGAL_TYPE_OF_ARGUMENT} - - - - From 5a868304c04755bb62b30c45e408b65a3e78dcd0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 27 May 2024 11:38:22 +0200 Subject: [PATCH 644/651] Revert "Remove some unnecessary `UNREACHABLE`s" --- programs/keeper-client/Commands.cpp | 3 +-- programs/main.cpp | 2 +- src/Access/AccessEntityIO.cpp | 3 ++- src/Access/AccessRights.cpp | 2 +- src/Access/IAccessStorage.cpp | 9 ++++++--- .../AggregateFunctionGroupArray.cpp | 13 +++++++------ .../AggregateFunctionSequenceNextNode.cpp | 1 + src/AggregateFunctions/AggregateFunctionSum.h | 1 + src/Common/DateLUTImpl.cpp | 1 + src/Common/IntervalKind.cpp | 10 ++++++++++ src/Common/TargetSpecific.cpp | 2 ++ src/Common/ThreadProfileEvents.cpp | 1 + src/Common/ZooKeeper/IKeeper.cpp | 2 ++ src/Compression/CompressionCodecDeflateQpl.cpp | 1 + src/Compression/CompressionCodecDoubleDelta.cpp | 10 +++------- src/Coordination/KeeperReconfiguration.cpp | 8 +------- src/Coordination/KeeperServer.cpp | 2 +- src/Core/Field.h | 2 ++ src/DataTypes/Serializations/ISerialization.cpp | 1 + src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 1 + .../MetadataStorageTransactionState.cpp | 1 + src/Disks/VolumeJBOD.cpp | 2 ++ src/Formats/EscapingRuleUtils.cpp | 1 + src/Functions/FunctionsRound.h | 8 ++++++++ src/Functions/FunctionsTimeWindow.cpp | 2 ++ src/Functions/PolygonUtils.h | 2 ++ .../UserDefinedSQLObjectsZooKeeperStorage.cpp | 1 + src/IO/CompressionMethod.cpp | 1 + src/IO/HadoopSnappyReadBuffer.h | 1 + src/Interpreters/AggregatedDataVariants.cpp | 8 ++++++++ src/Interpreters/Cache/FileSegment.cpp | 1 + src/Interpreters/ComparisonGraph.cpp | 1 + src/Interpreters/FilesystemCacheLog.cpp | 1 + src/Interpreters/HashJoin.cpp | 3 +++ src/Interpreters/HashJoin.h | 6 ++++++ .../InterpreterTransactionControlQuery.cpp | 1 + src/Interpreters/SetVariants.cpp | 4 ++++ src/Parsers/ASTExplainQuery.h | 2 ++ src/Parsers/Lexer.cpp | 4 +++- .../Formats/Impl/MsgPackRowInputFormat.cpp | 1 + src/Processors/IProcessor.cpp | 2 ++ src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 ++++++ src/Processors/QueryPlan/TotalsHavingStep.cpp | 2 ++ src/Processors/Transforms/FillingTransform.cpp | 1 + .../Transforms/buildPushingToViewsChain.cpp | 2 ++ src/Storages/MergeTree/BackgroundJobsAssignee.cpp | 1 + src/Storages/MergeTree/KeyCondition.cpp | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 ++ .../PartMovesBetweenShardsOrchestrator.cpp | 2 ++ src/Storages/WindowView/StorageWindowView.cpp | 3 +++ 51 files changed, 121 insertions(+), 30 deletions(-) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 860840a2d06..a109912e6e0 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -10,7 +10,6 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int KEEPER_EXCEPTION; } @@ -442,7 +441,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient new_members = query->args[1].safeGet(); break; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected operation: {}", operation); + UNREACHABLE(); } auto response = client->zookeeper->reconfig(joining, leaving, new_members); diff --git a/programs/main.cpp b/programs/main.cpp index c270388f17f..bc8476e4ce4 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -155,8 +155,8 @@ auto instructionFailToString(InstructionFail fail) ret("AVX2"); case InstructionFail::AVX512: ret("AVX512"); -#undef ret } + UNREACHABLE(); } diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index 1b073329296..b0dfd74c53b 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -144,7 +144,8 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String catch (Exception & e) { e.addMessage("Could not parse " + file_path); - throw; + e.rethrow(); + UNREACHABLE(); } } diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 2127f4ada70..c10931f554c 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -258,7 +258,7 @@ namespace case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel(); case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel(); } - chassert(false); + UNREACHABLE(); } } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 8d4e7d3073e..8e51481e415 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -257,7 +257,8 @@ std::vector IAccessStorage::insert(const std::vector & mu } e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str); } - throw; + e.rethrow(); + UNREACHABLE(); } } @@ -360,7 +361,8 @@ std::vector IAccessStorage::remove(const std::vector & ids, bool thr } e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str); } - throw; + e.rethrow(); + UNREACHABLE(); } } @@ -456,7 +458,8 @@ std::vector IAccessStorage::update(const std::vector & ids, const Up } e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str); } - throw; + e.rethrow(); + UNREACHABLE(); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 930b2c6ce73..d4fb7afcb78 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -60,13 +60,14 @@ struct GroupArrayTrait template constexpr const char * getNameByTrait() { - if constexpr (Trait::last) + if (Trait::last) return "groupArrayLast"; - switch (Trait::sampler) - { - case Sampler::NONE: return "groupArray"; - case Sampler::RNG: return "groupArraySample"; - } + if (Trait::sampler == Sampler::NONE) + return "groupArray"; + else if (Trait::sampler == Sampler::RNG) + return "groupArraySample"; + + UNREACHABLE(); } template diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp index a9dd53a75e8..bed10333af0 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp @@ -414,6 +414,7 @@ public: break; return (i == events_size) ? base - i : unmatched_idx; } + UNREACHABLE(); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 2ce03c530c2..58aaddf357a 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -463,6 +463,7 @@ public: return "sumWithOverflow"; else if constexpr (Type == AggregateFunctionTypeSumKahan) return "sumKahan"; + UNREACHABLE(); } explicit AggregateFunctionSum(const DataTypes & argument_types_) diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index c87d44a4b95..392ee64dcbf 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -41,6 +41,7 @@ UInt8 getDayOfWeek(const cctz::civil_day & date) case cctz::weekday::saturday: return 6; case cctz::weekday::sunday: return 7; } + UNREACHABLE(); } inline cctz::time_point lookupTz(const cctz::time_zone & cctz_time_zone, const cctz::civil_day & date) diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 1548d5cf9a5..22c7db504c3 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -34,6 +34,8 @@ Int64 IntervalKind::toAvgNanoseconds() const default: return toAvgSeconds() * NANOSECONDS_PER_SECOND; } + + UNREACHABLE(); } Int32 IntervalKind::toAvgSeconds() const @@ -52,6 +54,7 @@ Int32 IntervalKind::toAvgSeconds() const case IntervalKind::Kind::Quarter: return 7889238; /// Exactly 1/4 of a year. case IntervalKind::Kind::Year: return 31556952; /// The average length of a Gregorian year is equal to 365.2425 days } + UNREACHABLE(); } Float64 IntervalKind::toSeconds() const @@ -77,6 +80,7 @@ Float64 IntervalKind::toSeconds() const default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not possible to get precise number of seconds in non-precise interval"); } + UNREACHABLE(); } bool IntervalKind::isFixedLength() const @@ -95,6 +99,7 @@ bool IntervalKind::isFixedLength() const case IntervalKind::Kind::Quarter: case IntervalKind::Kind::Year: return false; } + UNREACHABLE(); } IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds) @@ -136,6 +141,7 @@ const char * IntervalKind::toKeyword() const case IntervalKind::Kind::Quarter: return "QUARTER"; case IntervalKind::Kind::Year: return "YEAR"; } + UNREACHABLE(); } @@ -155,6 +161,7 @@ const char * IntervalKind::toLowercasedKeyword() const case IntervalKind::Kind::Quarter: return "quarter"; case IntervalKind::Kind::Year: return "year"; } + UNREACHABLE(); } @@ -185,6 +192,7 @@ const char * IntervalKind::toDateDiffUnit() const case IntervalKind::Kind::Year: return "year"; } + UNREACHABLE(); } @@ -215,6 +223,7 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const case IntervalKind::Kind::Year: return "toIntervalYear"; } + UNREACHABLE(); } @@ -248,6 +257,7 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const case IntervalKind::Kind::Year: return "toYear"; } + UNREACHABLE(); } diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index 8540c9a9986..49f396c0926 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -54,6 +54,8 @@ String toString(TargetArch arch) case TargetArch::AMXTILE: return "amxtile"; case TargetArch::AMXINT8: return "amxint8"; } + + UNREACHABLE(); } } diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index 23b41f23bde..6a63d484cd9 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -75,6 +75,7 @@ const char * TasksStatsCounters::metricsProviderString(MetricsProvider provider) case MetricsProvider::Netlink: return "netlink"; } + UNREACHABLE(); } bool TasksStatsCounters::checkIfAvailable() diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 7cca262baca..7d2602bde1e 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -146,6 +146,8 @@ const char * errorMessage(Error code) case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; case Error::ZNOTREADONLY: return "State-changing request is passed to read-only server"; } + + UNREACHABLE(); } bool isHardwareError(Error zk_return_code) diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index f1b5b24e866..7e0653c69f8 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -466,6 +466,7 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); return; } + UNREACHABLE(); } void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests() diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index cbd8cd57a62..e6e8db4c699 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -21,11 +21,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - /** NOTE DoubleDelta is surprisingly bad name. The only excuse is that it comes from an academic paper. * Most people will think that "double delta" is just applying delta transform twice. * But in fact it is something more than applying delta transform twice. @@ -147,9 +142,9 @@ namespace ErrorCodes { extern const int CANNOT_COMPRESS; extern const int CANNOT_DECOMPRESS; + extern const int BAD_ARGUMENTS; extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; extern const int ILLEGAL_CODEC_PARAMETER; - extern const int LOGICAL_ERROR; } namespace @@ -168,8 +163,9 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size) case sizeof(UInt64): return std::numeric_limits::max(); default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "only 1, 2, 4 and 8 data sizes are supported"); + assert(false && "only 1, 2, 4 and 8 data sizes are supported"); } + UNREACHABLE(); } struct WriteSpec diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp index 05211af6704..e3642913a7a 100644 --- a/src/Coordination/KeeperReconfiguration.cpp +++ b/src/Coordination/KeeperReconfiguration.cpp @@ -5,12 +5,6 @@ namespace DB { - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining) { ClusterUpdateActions out; @@ -85,7 +79,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)}); } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected update"); + UNREACHABLE(); } for (const auto & item : cfg->get_servers()) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 736a01443ce..8d21ce2ab01 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -990,7 +990,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate( raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true); return Accepted; } - std::unreachable(); + UNREACHABLE(); } ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) diff --git a/src/Core/Field.h b/src/Core/Field.h index 710614cd0a0..4424d669c4d 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -667,6 +667,8 @@ public: case Types::AggregateFunctionState: return f(field.template get()); case Types::CustomType: return f(field.template get()); } + + UNREACHABLE(); } String dump() const; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index bbb1d1a6cd1..dbe27a5f3f6 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -36,6 +36,7 @@ String ISerialization::kindToString(Kind kind) case Kind::SPARSE: return "Sparse"; } + UNREACHABLE(); } ISerialization::Kind ISerialization::stringToKind(const String & str) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index cb34f7932c3..3433698a162 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -140,6 +140,7 @@ private: case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; } + UNREACHABLE(); } size_t first_offset = 0; diff --git a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp index a37f4ce7e65..245578b5d9e 100644 --- a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp @@ -17,6 +17,7 @@ std::string toString(MetadataStorageTransactionState state) case MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK: return "PARTIALLY_ROLLED_BACK"; } + UNREACHABLE(); } } diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index f8b9a57affe..d0e9d32ff5e 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -112,6 +112,7 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const return disks_by_size.top().disk; } } + UNREACHABLE(); } ReservationPtr VolumeJBOD::reserve(UInt64 bytes) @@ -163,6 +164,7 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes) return reservation; } } + UNREACHABLE(); } bool VolumeJBOD::areMergesAvoided() const diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 9577ca2a8df..89a7a31d033 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -62,6 +62,7 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule) case FormatSettings::EscapingRule::Raw: return "Raw"; } + UNREACHABLE(); } void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings) diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index dde57e8320d..99f3a14dfec 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -149,6 +149,8 @@ struct IntegerRoundingComputation return x; } } + + UNREACHABLE(); } static ALWAYS_INLINE T compute(T x, T scale) @@ -161,6 +163,8 @@ struct IntegerRoundingComputation case ScaleMode::Negative: return computeImpl(x, scale); } + + UNREACHABLE(); } static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral @@ -243,6 +247,8 @@ inline float roundWithMode(float x, RoundingMode mode) case RoundingMode::Ceil: return ceilf(x); case RoundingMode::Trunc: return truncf(x); } + + UNREACHABLE(); } inline double roundWithMode(double x, RoundingMode mode) @@ -254,6 +260,8 @@ inline double roundWithMode(double x, RoundingMode mode) case RoundingMode::Ceil: return ceil(x); case RoundingMode::Trunc: return trunc(x); } + + UNREACHABLE(); } template diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index f93a885ee65..1c9f28c9724 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -232,6 +232,7 @@ struct TimeWindowImpl default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } + UNREACHABLE(); } template @@ -421,6 +422,7 @@ struct TimeWindowImpl default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } + UNREACHABLE(); } template diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 57f1243537d..c4851718da6 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -381,6 +381,8 @@ bool PointInPolygonWithGrid::contains(CoordinateType x, Coordina case CellType::complexPolygon: return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]); } + + UNREACHABLE(); } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index 766d63eafb0..568e0b9b5d2 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -35,6 +35,7 @@ namespace case UserDefinedSQLObjectType::Function: return "function_"; } + UNREACHABLE(); } constexpr std::string_view sql_extension = ".sql"; diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index 22913125e99..b8e1134d422 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -52,6 +52,7 @@ std::string toContentEncodingName(CompressionMethod method) case CompressionMethod::None: return ""; } + UNREACHABLE(); } CompressionMethod chooseHTTPCompressionMethod(const std::string & list) diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index bbbb84dd6dd..73e52f2c503 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -88,6 +88,7 @@ public: case Status::TOO_LARGE_COMPRESSED_BLOCK: return "TOO_LARGE_COMPRESSED_BLOCK"; } + UNREACHABLE(); } explicit HadoopSnappyReadBuffer( diff --git a/src/Interpreters/AggregatedDataVariants.cpp b/src/Interpreters/AggregatedDataVariants.cpp index 8f82f15248f..87cfdda5948 100644 --- a/src/Interpreters/AggregatedDataVariants.cpp +++ b/src/Interpreters/AggregatedDataVariants.cpp @@ -117,6 +117,8 @@ size_t AggregatedDataVariants::size() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } + + UNREACHABLE(); } size_t AggregatedDataVariants::sizeWithoutOverflowRow() const @@ -134,6 +136,8 @@ size_t AggregatedDataVariants::sizeWithoutOverflowRow() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } + + UNREACHABLE(); } const char * AggregatedDataVariants::getMethodName() const @@ -151,6 +155,8 @@ const char * AggregatedDataVariants::getMethodName() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } + + UNREACHABLE(); } bool AggregatedDataVariants::isTwoLevel() const @@ -168,6 +174,8 @@ bool AggregatedDataVariants::isTwoLevel() const APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } + + UNREACHABLE(); } bool AggregatedDataVariants::isConvertibleToTwoLevel() const diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 61a356fa3c3..9459029dc4c 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -799,6 +799,7 @@ String FileSegment::stateToString(FileSegment::State state) case FileSegment::State::DETACHED: return "DETACHED"; } + UNREACHABLE(); } bool FileSegment::assertCorrectness() const diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index d53ff4b0227..4eacbae7a30 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -309,6 +309,7 @@ ComparisonGraphCompareResult ComparisonGraph::pathToCompareResult(Path pat case Path::GREATER: return inverse ? ComparisonGraphCompareResult::LESS : ComparisonGraphCompareResult::GREATER; case Path::GREATER_OR_EQUAL: return inverse ? ComparisonGraphCompareResult::LESS_OR_EQUAL : ComparisonGraphCompareResult::GREATER_OR_EQUAL; } + UNREACHABLE(); } template diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index aa489351a98..80fe1c3a8ef 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -26,6 +26,7 @@ static String typeToString(FilesystemCacheLogElement::CacheType type) case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE: return "WRITE_THROUGH_CACHE"; } + UNREACHABLE(); } ColumnsDescription FilesystemCacheLogElement::getColumnsDescription() diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 75da8bbc3e7..3a21c13db5e 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -705,6 +705,7 @@ namespace APPLY_FOR_JOIN_VARIANTS(M) #undef M } + UNREACHABLE(); } } @@ -2640,6 +2641,8 @@ private: default: throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); } + + UNREACHABLE(); } template diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index a0996556f9a..86db8943926 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -322,6 +322,8 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } + + UNREACHABLE(); } size_t getTotalByteCountImpl(Type which) const @@ -336,6 +338,8 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } + + UNREACHABLE(); } size_t getBufferSizeInCells(Type which) const @@ -350,6 +354,8 @@ public: APPLY_FOR_JOIN_VARIANTS(M) #undef M } + + UNREACHABLE(); } /// NOLINTEND(bugprone-macro-parentheses) }; diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index 13872fbe3f5..d31ace758c4 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -33,6 +33,7 @@ BlockIO InterpreterTransactionControlQuery::execute() case ASTTransactionControl::SET_SNAPSHOT: return executeSetSnapshot(session_context, tcl.snapshot); } + UNREACHABLE(); } BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context) diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index c600d096160..64796a013f1 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -41,6 +41,8 @@ size_t SetVariantsTemplate::getTotalRowCount() const APPLY_FOR_SET_VARIANTS(M) #undef M } + + UNREACHABLE(); } template @@ -55,6 +57,8 @@ size_t SetVariantsTemplate::getTotalByteCount() const APPLY_FOR_SET_VARIANTS(M) #undef M } + + UNREACHABLE(); } template diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index eb095b5dbbc..701bde8cebd 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -40,6 +40,8 @@ public: case TableOverride: return "EXPLAIN TABLE OVERRIDE"; case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION"; } + + UNREACHABLE(); } static ExplainKind fromString(const String & str) diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 5f2bd50524c..34855a7ce20 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -42,7 +42,7 @@ Token quotedString(const char *& pos, const char * const token_begin, const char continue; } - chassert(false); + UNREACHABLE(); } } @@ -538,6 +538,8 @@ const char * getTokenName(TokenType type) APPLY_FOR_TOKENS(M) #undef M } + + UNREACHABLE(); } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 6b7f1f5206c..98cbdeaaa4b 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -657,6 +657,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type()); } } + UNREACHABLE(); } std::optional MsgPackSchemaReader::readRowAndGetDataTypes() diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 5ab5e5277aa..8b160153733 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -36,6 +36,8 @@ std::string IProcessor::statusToName(Status status) case Status::ExpandPipeline: return "ExpandPipeline"; } + + UNREACHABLE(); } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 24ea8c25fb6..6f0fa55c349 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1136,6 +1136,8 @@ static void addMergingFinal( return std::make_shared(header, num_outputs, sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, merging_params.graphite_params, now); } + + UNREACHABLE(); }; pipe.addTransform(get_merging_processor()); @@ -2123,6 +2125,8 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type) case ReadFromMergeTree::IndexType::Skip: return "Skip"; } + + UNREACHABLE(); } static const char * readTypeToString(ReadFromMergeTree::ReadType type) @@ -2138,6 +2142,8 @@ static const char * readTypeToString(ReadFromMergeTree::ReadType type) case ReadFromMergeTree::ReadType::ParallelReplicas: return "Parallel"; } + + UNREACHABLE(); } void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index ac5e144bf4a..d1bd70fd0b2 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -86,6 +86,8 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr case TotalsMode::AFTER_HAVING_AUTO: return "after_having_auto threshold " + std::to_string(auto_include_threshold); } + + UNREACHABLE(); } void TotalsHavingStep::describeActions(FormatSettings & settings) const diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index bb38c3e1dc5..05fd2a7254f 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -67,6 +67,7 @@ static FillColumnDescription::StepFunction getStepFunction( FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE } + UNREACHABLE(); } static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index a1a886fb4f7..cdcfad4442c 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -898,6 +898,8 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St { return std::current_exception(); } + + UNREACHABLE(); } void FinalizingViewsTransform::work() diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp index 0a69bf1109f..56a4378cf9a 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp @@ -93,6 +93,7 @@ String BackgroundJobsAssignee::toString(Type type) case Type::Moving: return "Moving"; } + UNREACHABLE(); } void BackgroundJobsAssignee::start() diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 9666da574fb..bd8642b9f66 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -2964,6 +2964,8 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri case ALWAYS_TRUE: return "true"; } + + UNREACHABLE(); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b6373a22d9c..4b3093eeaac 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1177,6 +1177,8 @@ String MergeTreeData::MergingParams::getModeName() const case Graphite: return "Graphite"; case VersionedCollapsing: return "VersionedCollapsing"; } + + UNREACHABLE(); } Int64 MergeTreeData::getMaxBlockNumber() const diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index df4087b8546..426e36ce9a9 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -360,6 +360,8 @@ Block MergeTreeDataWriter::mergeBlock( return std::make_shared( block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr)); } + + UNREACHABLE(); }; auto merging_algorithm = get_merging_algorithm(); diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 4228d7b70b6..78fcfabb704 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -616,6 +616,8 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st } } } + + UNREACHABLE(); } void PartMovesBetweenShardsOrchestrator::removePins(const Entry & entry, zkutil::ZooKeeperPtr zk) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 8bca1c97aad..a9ec1f6c694 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -297,6 +297,7 @@ namespace CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } + UNREACHABLE(); } class AddingAggregatedChunkInfoTransform : public ISimpleTransform @@ -919,6 +920,7 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } + UNREACHABLE(); } UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) @@ -946,6 +948,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) CASE_WINDOW_KIND(Year) #undef CASE_WINDOW_KIND } + UNREACHABLE(); } void StorageWindowView::addFireSignal(std::set & signals) From c42338b8e0e4a8239fb34001860c9dba091e926a Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 27 May 2024 11:51:46 +0200 Subject: [PATCH 645/651] Fix test --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 69485bd4d01..823e272cf01 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -578,6 +578,7 @@ void S3ObjectStorage::applyNewSettings( auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings); auto modified_settings = std::make_unique(*s3_settings.get()); modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings); + modified_settings->request_settings = settings_from_config->request_settings; if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName())) modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); From 0676b155de8ebbea9cd9f8dcafdfe2dc8a03abfc Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 27 May 2024 12:12:39 +0200 Subject: [PATCH 646/651] Remove logging --- src/Storages/ObjectStorage/ReadBufferIterator.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 5e89a0a1b9d..78cdc442f64 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -254,21 +254,17 @@ ReadBufferIterator::Data ReadBufferIterator::next() } } - LOG_TEST(getLogger("KSSENII"), "Will read columns from {}", current_object_info->getPath()); - std::unique_ptr read_buf; CompressionMethod compression_method; using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive; if (const auto * object_info_in_archive = dynamic_cast(current_object_info.get())) { - LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from archive", current_object_info->getPath()); compression_method = chooseCompressionMethod(filename, configuration->compression_method); const auto & archive_reader = object_info_in_archive->archive_reader; read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true); } else { - LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from s3", current_object_info->getPath()); compression_method = chooseCompressionMethod(filename, configuration->compression_method); read_buf = object_storage->readObject( StoredObject(current_object_info->getPath()), From 2bffc72d64e62f9f5ddb177f4b617bcc6d2c6253 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 27 May 2024 10:57:26 +0000 Subject: [PATCH 647/651] Fix optimize_aggregation_in_order setting --- .../queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh index c433d409c7c..b8760ec0e1d 100755 --- a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh @@ -8,7 +8,7 @@ CLICKHOUSE_LOG_COMMENT= . "$CUR_DIR"/../shell_config.sh # Fix some settings to avoid timeouts because of some settings randomization -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128 --optimize_aggregation_in_order 0" function test() { From ed6994d372b636b4981593303e8dfde654bc151b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 27 May 2024 13:01:35 +0200 Subject: [PATCH 648/651] Clean settings in 02943_variant_read_subcolumns test --- tests/queries/0_stateless/02943_variant_read_subcolumns.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh index 6bbd127d933..5ca8dd5f36f 100755 --- a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh @@ -7,8 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1 --max_insert_threads 4 --group_by_two_level_threshold 752249 --group_by_two_level_threshold_bytes 15083870 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 6583861 --max_read_buffer_size 640584 --prefer_localhost_replica 1 --max_block_size 38844 --max_threads 48 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 0 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 4 --optimize_aggregation_in_order 0 --aggregation_in_order_max_block_bytes 18284646 --use_uncompressed_cache 1 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 10737418240 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 0 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 1 --remote_filesystem_read_prefetch 0 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 128Mi --filesystem_prefetches_limit 0 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 50 --filesystem_prefetch_step_bytes 0 --compile_aggregate_expressions 1 --compile_sort_description 0 --merge_tree_coarse_index_granularity 31 --optimize_distinct_in_order 1 --max_bytes_before_external_sort 1 --max_bytes_before_external_group_by 1 --max_bytes_before_remerge_sort 2640239625 --min_compress_block_size 3114155 --max_compress_block_size 226550 --merge_tree_compact_parts_min_granules_to_multibuffer_read 118 --optimize_sorting_by_input_stream_properties 0 --http_response_buffer_size 543038 --http_wait_end_of_query False --enable_memory_bound_merging_of_aggregation_results 1 --min_count_to_compile_expression 3 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 0 --session_timezone America/Mazatlan --prefer_warmed_unmerged_parts_seconds 8 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.82 " - +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" function test() { From 747f6ae39c98d2caac1ddd6f5958aecc7bb92e22 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 27 May 2024 12:52:44 +0000 Subject: [PATCH 649/651] Add a comment after #64226 --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 3fca66e6eb8..43edaaa53fd 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3916,6 +3916,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi return array_join_column; } + /// Resolve subcolumns. Example : SELECT x.y.z FROM tab ARRAY JOIN arr AS x auto compound_expr = tryResolveIdentifierFromCompoundExpression( identifier_lookup.identifier, identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/, From 8166da7fbb616d9fa2d779ffe8e533b238d3680e Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 27 May 2024 16:21:36 +0200 Subject: [PATCH 650/651] Incorporate review changes --- .../functions/type-conversion-functions.md | 124 +++++------------- 1 file changed, 30 insertions(+), 94 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 2360cecb9a5..c4e0b2946c4 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -998,7 +998,7 @@ Result: ## reinterpretAsUInt8 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt8. +Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1008,11 +1008,7 @@ reinterpretAsUInt8(x) **Parameters** -- `x`: value to byte reinterpret as UInt8. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as UInt8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1040,7 +1036,7 @@ Result: ## reinterpretAsUInt16 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt16. +Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1050,11 +1046,7 @@ reinterpretAsUInt16(x) **Parameters** -- `x`: value to byte reinterpret as UInt16. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as UInt16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1082,7 +1074,7 @@ Result: ## reinterpretAsUInt32 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt32. +Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1092,11 +1084,7 @@ reinterpretAsUInt32(x) **Parameters** -- `x`: value to byte reinterpret as UInt32. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as UInt32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1124,7 +1112,7 @@ Result: ## reinterpretAsUInt64 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt64. +Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1134,11 +1122,7 @@ reinterpretAsUInt64(x) **Parameters** -- `x`: value to byte reinterpret as UInt64. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as UInt64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1166,7 +1150,7 @@ Result: ## reinterpretAsUInt128 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt128. +Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1176,11 +1160,7 @@ reinterpretAsUInt128(x) **Parameters** -- `x`: value to byte reinterpret as UInt64. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as UInt128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1208,7 +1188,7 @@ Result: ## reinterpretAsUInt256 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt256. +Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1218,11 +1198,7 @@ reinterpretAsUInt256(x) **Parameters** -- `x`: value to byte reinterpret as UInt256. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as UInt256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1250,7 +1226,7 @@ Result: ## reinterpretAsInt8 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int8. +Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1260,11 +1236,7 @@ reinterpretAsInt8(x) **Parameters** -- `x`: value to byte reinterpret as Int8. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as Int8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1292,7 +1264,7 @@ Result: ## reinterpretAsInt16 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int16. +Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1302,11 +1274,7 @@ reinterpretAsInt16(x) **Parameters** -- `x`: value to byte reinterpret as Int16. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as Int16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1334,7 +1302,7 @@ Result: ## reinterpretAsInt32 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int32. +Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1344,11 +1312,7 @@ reinterpretAsInt32(x) **Parameters** -- `x`: value to byte reinterpret as Int32. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as Int32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1376,7 +1340,7 @@ Result: ## reinterpretAsInt64 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int64. +Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1386,11 +1350,7 @@ reinterpretAsInt64(x) **Parameters** -- `x`: value to byte reinterpret as Int64. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as Int64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1418,7 +1378,7 @@ Result: ## reinterpretAsInt128 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int128. +Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1428,11 +1388,7 @@ reinterpretAsInt128(x) **Parameters** -- `x`: value to byte reinterpret as Int128. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as Int128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1460,7 +1416,7 @@ Result: ## reinterpretAsInt256 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int256. +Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1470,11 +1426,7 @@ reinterpretAsInt256(x) **Parameters** -- `x`: value to byte reinterpret as Int256. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to byte reinterpret as Int256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1502,7 +1454,7 @@ Result: ## reinterpretAsFloat32 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float32. +Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1512,11 +1464,7 @@ reinterpretAsFloat32(x) **Parameters** -- `x`: value to reinterpret as Float32. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to reinterpret as Float32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1540,7 +1488,7 @@ Result: ## reinterpretAsFloat64 -Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float64. +Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1550,11 +1498,7 @@ reinterpretAsFloat64(x) **Parameters** -- `x`: value to reinterpret as Float64. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: value to reinterpret as Float64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1588,11 +1532,7 @@ reinterpretAsDate(x) **Parameters** -- `x`: number of days since the beginning of the Unix Epoch. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: number of days since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** @@ -1632,11 +1572,7 @@ reinterpretAsDateTime(x) **Parameters** -- `x`: number of seconds since the beginning of the Unix Epoch. - -:::note -Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -::: +- `x`: number of seconds since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned value** From 9eb79530f4b40d0f0dcef4ecd82da97e5136a4bf Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 27 May 2024 17:35:42 +0200 Subject: [PATCH 651/651] CI: fix build_report selection in case of job reuse --- tests/ci/report.py | 50 +++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 8676c998afb..670a10f4561 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -401,30 +401,40 @@ class BuildResult: @classmethod def load_any(cls, build_name: str, pr_number: int, head_ref: str): # type: ignore """ - loads report from suitable report file with the following priority: - 1. report from PR with the same @pr_number - 2. report from branch with the same @head_ref - 3. report from the master - 4. any other report + loads build report from one of all available report files (matching the job digest) + with the following priority: + 1. report for the current PR @pr_number (might happen in PR' wf with or without job reuse) + 2. report for the current branch @head_ref (might happen in release/master' wf with or without job reuse) + 3. report for master branch (might happen in any workflow in case of job reuse) + 4. any other report (job reuse from another PR, if master report is not available yet) """ - reports = [] + pr_report = None + ref_report = None + master_report = None + any_report = None for file in Path(REPORT_PATH).iterdir(): if f"{build_name}.json" in file.name: - reports.append(file) - if not reports: - return None - file_path = None - for file in reports: - if pr_number and f"_{pr_number}_" in file.name: - file_path = file - break - if f"_{head_ref}_" in file.name: - file_path = file - break + any_report = file if "_master_" in file.name: - file_path = file - break - return cls.load_from_file(file_path or reports[-1]) + master_report = file + elif f"_{head_ref}_" in file.name: + ref_report = file + elif pr_number and f"_{pr_number}_" in file.name: + pr_report = file + + if not any_report: + return None + + if pr_report: + file_path = pr_report + elif ref_report: + file_path = ref_report + elif master_report: + file_path = master_report + else: + file_path = any_report + + return cls.load_from_file(file_path) @classmethod def load_from_file(cls, file: Union[Path, str]): # type: ignore